Initial commit

This commit is contained in:
yichuan520030910320
2025-06-30 09:05:05 +00:00
commit 46f6cc100b
1231 changed files with 278432 additions and 0 deletions

View File

@@ -0,0 +1,101 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexFlat.h>
// 64-bit int
using idx_t = faiss::idx_t;
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[d * nb];
float* xq = new float[d * nq];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++)
xb[d * i + j] = distrib(rng);
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++)
xq[d * i + j] = distrib(rng);
xq[d * i] += i / 1000.;
}
faiss::IndexFlatL2 index(d); // call constructor
printf("is_trained = %s\n", index.is_trained ? "true" : "false");
index.add(nb, xb); // add vectors to the index
printf("ntotal = %zd\n", index.ntotal);
int k = 4;
{ // sanity check: search 5 first vectors of xb
idx_t* I = new idx_t[k * 5];
float* D = new float[k * 5];
index.search(5, xb, k, D, I);
// print results
printf("I=\n");
for (int i = 0; i < 5; i++) {
for (int j = 0; j < k; j++)
printf("%5zd ", I[i * k + j]);
printf("\n");
}
printf("D=\n");
for (int i = 0; i < 5; i++) {
for (int j = 0; j < k; j++)
printf("%7g ", D[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
{ // search xq
idx_t* I = new idx_t[k * nq];
float* D = new float[k * nq];
index.search(nq, xq, k, D, I);
// print results
printf("I (5 first results)=\n");
for (int i = 0; i < 5; i++) {
for (int j = 0; j < k; j++)
printf("%5zd ", I[i * k + j]);
printf("\n");
}
printf("D (5 last results)=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5f ", D[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
delete[] xb;
delete[] xq;
return 0;
}

View File

@@ -0,0 +1,79 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFFlat.h>
using idx_t = faiss::idx_t;
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[d * nb];
float* xq = new float[d * nq];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++)
xb[d * i + j] = distrib(rng);
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++)
xq[d * i + j] = distrib(rng);
xq[d * i] += i / 1000.;
}
int nlist = 100;
int k = 4;
faiss::IndexFlatL2 quantizer(d); // the other index
faiss::IndexIVFFlat index(&quantizer, d, nlist);
assert(!index.is_trained);
index.train(nb, xb);
assert(index.is_trained);
index.add(nb, xb);
{ // search xq
idx_t* I = new idx_t[k * nq];
float* D = new float[k * nq];
index.search(nq, xq, k, D, I);
printf("I=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5zd ", I[i * k + j]);
printf("\n");
}
printf("D=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5f ", D[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
delete[] xb;
delete[] xq;
return 0;
}

View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexFlat.h>
#include <faiss/IndexIVFPQ.h>
using idx_t = faiss::idx_t;
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[d * nb];
float* xq = new float[d * nq];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++)
xb[d * i + j] = distrib(rng);
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++)
xq[d * i + j] = distrib(rng);
xq[d * i] += i / 1000.;
}
int nlist = 100;
int k = 4;
int m = 8; // bytes per vector
faiss::IndexFlatL2 quantizer(d); // the other index
faiss::IndexIVFPQ index(&quantizer, d, nlist, m, 8);
index.train(nb, xb);
index.add(nb, xb);
{ // sanity check
idx_t* I = new idx_t[k * 5];
float* D = new float[k * 5];
index.search(5, xb, k, D, I);
printf("I=\n");
for (int i = 0; i < 5; i++) {
for (int j = 0; j < k; j++)
printf("%5zd ", I[i * k + j]);
printf("\n");
}
printf("D=\n");
for (int i = 0; i < 5; i++) {
for (int j = 0; j < k; j++)
printf("%7g ", D[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
{ // search xq
idx_t* I = new idx_t[k * nq];
float* D = new float[k * nq];
index.nprobe = 10;
index.search(nq, xq, k, D, I);
printf("I=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5zd ", I[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
delete[] xb;
delete[] xq;
return 0;
}

View File

@@ -0,0 +1,120 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexFlat.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/GpuIndexIVFFlat.h>
#include <faiss/gpu/StandardGpuResources.h>
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[d * nb];
float* xq = new float[d * nq];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++)
xb[d * i + j] = distrib(rng);
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++)
xq[d * i + j] = distrib(rng);
xq[d * i] += i / 1000.;
}
faiss::gpu::StandardGpuResources res;
// Using a flat index
faiss::gpu::GpuIndexFlatL2 index_flat(&res, d);
printf("is_trained = %s\n", index_flat.is_trained ? "true" : "false");
index_flat.add(nb, xb); // add vectors to the index
printf("ntotal = %ld\n", index_flat.ntotal);
int k = 4;
{ // search xq
long* I = new long[k * nq];
float* D = new float[k * nq];
index_flat.search(nq, xq, k, D, I);
// print results
printf("I (5 first results)=\n");
for (int i = 0; i < 5; i++) {
for (int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
printf("I (5 last results)=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
// Using an IVF index
int nlist = 100;
faiss::gpu::GpuIndexIVFFlat index_ivf(&res, d, nlist, faiss::METRIC_L2);
assert(!index_ivf.is_trained);
index_ivf.train(nb, xb);
assert(index_ivf.is_trained);
index_ivf.add(nb, xb); // add vectors to the index
printf("is_trained = %s\n", index_ivf.is_trained ? "true" : "false");
printf("ntotal = %ld\n", index_ivf.ntotal);
{ // search xq
long* I = new long[k * nq];
float* D = new float[k * nq];
index_ivf.search(nq, xq, k, D, I);
// print results
printf("I (5 first results)=\n");
for (int i = 0; i < 5; i++) {
for (int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
printf("I (5 last results)=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
delete[] xb;
delete[] xq;
return 0;
}

View File

@@ -0,0 +1,99 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexFlat.h>
#include <faiss/gpu/GpuAutoTune.h>
#include <faiss/gpu/GpuCloner.h>
#include <faiss/gpu/GpuIndexFlat.h>
#include <faiss/gpu/StandardGpuResources.h>
#include <faiss/gpu/utils/DeviceUtils.h>
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[d * nb];
float* xq = new float[d * nq];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++)
xb[d * i + j] = distrib(rng);
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++)
xq[d * i + j] = distrib(rng);
xq[d * i] += i / 1000.;
}
int ngpus = faiss::gpu::getNumDevices();
printf("Number of GPUs: %d\n", ngpus);
std::vector<faiss::gpu::GpuResourcesProvider*> res;
std::vector<int> devs;
for (int i = 0; i < ngpus; i++) {
res.push_back(new faiss::gpu::StandardGpuResources);
devs.push_back(i);
}
faiss::IndexFlatL2 cpu_index(d);
faiss::Index* gpu_index =
faiss::gpu::index_cpu_to_gpu_multiple(res, devs, &cpu_index);
printf("is_trained = %s\n", gpu_index->is_trained ? "true" : "false");
gpu_index->add(nb, xb); // add vectors to the index
printf("ntotal = %ld\n", gpu_index->ntotal);
int k = 4;
{ // search xq
long* I = new long[k * nq];
float* D = new float[k * nq];
gpu_index->search(nq, xq, k, D, I);
// print results
printf("I (5 first results)=\n");
for (int i = 0; i < 5; i++) {
for (int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
printf("I (5 last results)=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5ld ", I[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
delete gpu_index;
for (int i = 0; i < ngpus; i++) {
delete res[i];
}
delete[] xb;
delete[] xq;
return 0;
}

View File

@@ -0,0 +1,73 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexHNSW.h>
using idx_t = faiss::idx_t;
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[d * nb];
float* xq = new float[d * nq];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++)
xb[d * i + j] = distrib(rng);
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++)
xq[d * i + j] = distrib(rng);
xq[d * i] += i / 1000.;
}
int k = 4;
faiss::IndexHNSWFlat index(d, 32);
index.add(nb, xb);
{ // search xq
idx_t* I = new idx_t[k * nq];
float* D = new float[k * nq];
index.search(nq, xq, k, D, I);
printf("I=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5zd ", I[i * k + j]);
printf("\n");
}
printf("D=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++)
printf("%5f ", D[i * k + j]);
printf("\n");
}
delete[] I;
delete[] D;
}
delete[] xb;
delete[] xq;
return 0;
}

View File

@@ -0,0 +1,75 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexPQFastScan.h>
using idx_t = faiss::idx_t;
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[(int)(d * nb)];
float* xq = new float[(int)(d * nq)];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++) {
xb[d * i + j] = distrib(rng);
}
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++) {
xq[d * i + j] = distrib(rng);
}
xq[d * i] += i / 1000.;
}
int m = 8;
int n_bit = 4;
faiss::IndexPQFastScan index(d, m, n_bit);
printf("Index is trained? %s\n", index.is_trained ? "true" : "false");
index.train(nb, xb);
printf("Index is trained? %s\n", index.is_trained ? "true" : "false");
index.add(nb, xb);
int k = 4;
{ // search xq
idx_t* I = new idx_t[(int)(k * nq)];
float* D = new float[(int)(k * nq)];
index.search(nq, xq, k, D, I);
printf("I=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++) {
printf("%5zd ", I[i * k + j]);
}
printf("\n");
}
delete[] I;
delete[] D;
}
delete[] xb;
delete[] xq;
return 0;
} // namespace facebook::detail

View File

@@ -0,0 +1,84 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexPQFastScan.h>
#include <faiss/IndexRefine.h>
using idx_t = faiss::idx_t;
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[(int)(d * nb)];
float* xq = new float[(int)(d * nq)];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++) {
xb[d * i + j] = distrib(rng);
}
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++) {
xq[d * i + j] = distrib(rng);
}
xq[d * i] += i / 1000.;
}
int m = 8;
int n_bit = 4;
faiss::IndexPQFastScan index(d, m, n_bit);
faiss::IndexRefineFlat index_refine(&index);
// refine index after PQFastScan
printf("Index is trained? %s\n",
index_refine.is_trained ? "true" : "false");
index_refine.train(nb, xb);
printf("Index is trained? %s\n",
index_refine.is_trained ? "true" : "false");
index_refine.add(nb, xb);
int k = 4;
{ // search xq
idx_t* I = new idx_t[(int)(k * nq)];
float* D = new float[(int)(k * nq)];
float k_factor = 3;
faiss::IndexRefineSearchParameters* params =
new faiss::IndexRefineSearchParameters();
params->k_factor = k_factor;
index_refine.search(nq, xq, k, D, I, params);
printf("I=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++) {
printf("%5zd ", I[i * k + j]);
}
printf("\n");
}
delete[] I;
delete[] D;
delete params;
}
delete[] xb;
delete[] xq;
return 0;
}

View File

@@ -0,0 +1,104 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <faiss/IndexPQFastScan.h>
#include <faiss/IndexRefine.h>
#include <faiss/index_factory.h>
using idx_t = faiss::idx_t;
int main() {
int d = 64; // dimension
int nb = 100000; // database size
int nq = 10000; // nb of queries
std::mt19937 rng;
std::uniform_real_distribution<> distrib;
float* xb = new float[(int)(d * nb)];
float* xq = new float[(int)(d * nq)];
for (int i = 0; i < nb; i++) {
for (int j = 0; j < d; j++) {
xb[d * i + j] = distrib(rng);
}
xb[d * i] += i / 1000.;
}
for (int i = 0; i < nq; i++) {
for (int j = 0; j < d; j++) {
xq[d * i + j] = distrib(rng);
}
xq[d * i] += i / 1000.;
}
// Constructing the refine PQ index with SQfp16 with index factory
faiss::Index* index_fp16;
index_fp16 = faiss::index_factory(
d, "PQ32x4fs,Refine(SQfp16)", faiss::METRIC_L2);
index_fp16->train(nb, xb);
index_fp16->add(nb, xb);
// Constructing the refine PQ index with SQ8
faiss::Index* index_sq8;
index_sq8 =
faiss::index_factory(d, "PQ32x4fs,Refine(SQ8)", faiss::METRIC_L2);
index_sq8->train(nb, xb);
index_sq8->add(nb, xb);
int k = 10;
{ // search xq
idx_t* I_fp16 = new idx_t[(int)(k * nq)];
float* D_fp16 = new float[(int)(k * nq)];
idx_t* I_sq8 = new idx_t[(int)(k * nq)];
float* D_sq8 = new float[(int)(k * nq)];
// Parameterization on k factor while doing search for index refinement
float k_factor = 3;
faiss::IndexRefineSearchParameters* params =
new faiss::IndexRefineSearchParameters();
params->k_factor = k_factor;
// Perform index search using different index refinement
index_fp16->search(nq, xq, k, D_fp16, I_fp16, params);
index_sq8->search(nq, xq, k, D_sq8, I_sq8, params);
printf("I_fp16=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++) {
printf("%5zd ", I_fp16[i * k + j]);
}
printf("\n");
}
printf("I_sq8=\n");
for (int i = nq - 5; i < nq; i++) {
for (int j = 0; j < k; j++) {
printf("%5zd ", I_sq8[i * k + j]);
}
printf("\n");
}
delete[] I_fp16;
delete[] D_fp16;
delete[] I_sq8;
delete[] D_sq8;
delete params;
delete index_fp16;
delete index_sq8;
}
delete[] xb;
delete[] xq;
return 0;
}

View File

@@ -0,0 +1,31 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
add_executable(1-Flat EXCLUDE_FROM_ALL 1-Flat.cpp)
target_link_libraries(1-Flat PRIVATE faiss)
add_executable(2-IVFFlat EXCLUDE_FROM_ALL 2-IVFFlat.cpp)
target_link_libraries(2-IVFFlat PRIVATE faiss)
add_executable(3-IVFPQ EXCLUDE_FROM_ALL 3-IVFPQ.cpp)
target_link_libraries(3-IVFPQ PRIVATE faiss)
add_executable(4-GPU EXCLUDE_FROM_ALL 4-GPU.cpp)
target_link_libraries(4-GPU PRIVATE faiss)
add_executable(5-Multiple-GPUs EXCLUDE_FROM_ALL 5-Multiple-GPUs.cpp)
target_link_libraries(5-Multiple-GPUs PRIVATE faiss)
add_executable(6-HNSW EXCLUDE_FROM_ALL 6-HNSW.cpp)
target_link_libraries(6-HNSW PRIVATE faiss)
add_executable(7-PQFastScan EXCLUDE_FROM_ALL 7-PQFastScan.cpp)
target_link_libraries(7-PQFastScan PRIVATE faiss)
add_executable(8-PQFastScanRefine EXCLUDE_FROM_ALL 8-PQFastScanRefine.cpp)
target_link_libraries(8-PQFastScanRefine PRIVATE faiss)
add_executable(9-RefineComparison EXCLUDE_FROM_ALL 9-RefineComparison.cpp)
target_link_libraries(9-RefineComparison PRIVATE faiss)

View File

@@ -0,0 +1,29 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
d = 64 # dimension
nb = 100000 # database size
nq = 10000 # nb of queries
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.
import faiss # make faiss available
index = faiss.IndexFlatL2(d) # build the index
print(index.is_trained)
index.add(xb) # add vectors to the index
print(index.ntotal)
k = 4 # we want to see 4 nearest neighbors
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
D, I = index.search(xq, k) # actual search
print(I[:5]) # neighbors of the 5 first queries
print(I[-5:]) # neighbors of the 5 last queries

View File

@@ -0,0 +1,34 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
d = 64 # dimension
nb = 100000 # database size
nq = 10000 # nb of queries
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.
import faiss
nlist = 100
k = 4
quantizer = faiss.IndexFlatL2(d) # the other index
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
# here we specify METRIC_L2, by default it performs inner-product search
assert not index.is_trained
index.train(xb)
assert index.is_trained
index.add(xb) # add may be a bit slower as well
D, I = index.search(xq, k) # actual search
print(I[-5:]) # neighbors of the 5 last queries
index.nprobe = 10 # default nprobe is 1, try a few more
D, I = index.search(xq, k)
print(I[-5:]) # neighbors of the 5 last queries

View File

@@ -0,0 +1,32 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
d = 64 # dimension
nb = 100000 # database size
nq = 10000 # nb of queries
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.
import faiss
nlist = 100
m = 8
k = 4
quantizer = faiss.IndexFlatL2(d) # this remains the same
index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8)
# 8 specifies that each sub-vector is encoded as 8 bits
index.train(xb)
index.add(xb)
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
index.nprobe = 10 # make comparable with experiment above
D, I = index.search(xq, k) # search
print(I[-5:])

View File

@@ -0,0 +1,57 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
d = 64 # dimension
nb = 100000 # database size
nq = 10000 # nb of queries
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.
import faiss # make faiss available
res = faiss.StandardGpuResources() # use a single GPU
## Using a flat index
index_flat = faiss.IndexFlatL2(d) # build a flat (CPU) index
# make it a flat GPU index
gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
gpu_index_flat.add(xb) # add vectors to the index
print(gpu_index_flat.ntotal)
k = 4 # we want to see 4 nearest neighbors
D, I = gpu_index_flat.search(xq, k) # actual search
print(I[:5]) # neighbors of the 5 first queries
print(I[-5:]) # neighbors of the 5 last queries
## Using an IVF index
nlist = 100
quantizer = faiss.IndexFlatL2(d) # the other index
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
# here we specify METRIC_L2, by default it performs inner-product search
# make it an IVF GPU index
gpu_index_ivf = faiss.index_cpu_to_gpu(res, 0, index_ivf)
assert not gpu_index_ivf.is_trained
gpu_index_ivf.train(xb) # add vectors to the index
assert gpu_index_ivf.is_trained
gpu_index_ivf.add(xb) # add vectors to the index
print(gpu_index_ivf.ntotal)
k = 4 # we want to see 4 nearest neighbors
D, I = gpu_index_ivf.search(xq, k) # actual search
print(I[:5]) # neighbors of the 5 first queries
print(I[-5:]) # neighbors of the 5 last queries

View File

@@ -0,0 +1,35 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import numpy as np
d = 64 # dimension
nb = 100000 # database size
nq = 10000 # nb of queries
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32')
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.
import faiss # make faiss available
ngpus = faiss.get_num_gpus()
print("number of GPUs:", ngpus)
cpu_index = faiss.IndexFlatL2(d)
gpu_index = faiss.index_cpu_to_all_gpus( # build the index
cpu_index
)
gpu_index.add(xb) # add vectors to the index
print(gpu_index.ntotal)
k = 4 # we want to see 4 nearest neighbors
D, I = gpu_index.search(xq, k) # actual search
print(I[:5]) # neighbors of the 5 first queries
print(I[-5:]) # neighbors of the 5 last queries

View File

@@ -0,0 +1,35 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import faiss
import numpy as np
d = 64 # dimension
nb = 100000 # database size
nq = 10000 # nb of queries
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32') # 64-dim *nb queries
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.
m = 8 # 8 specifies that the number of sub-vector is 8
k = 4 # number of dimension in etracted vector
n_bit = 4 # 4 specifies that each sub-vector is encoded as 4 bits
bbs = 32 # build block size ( bbs % 32 == 0 ) for PQ
index = faiss.IndexPQFastScan(d, m, n_bit, faiss.METRIC_L2, bbs)
# construct FastScan Index
assert not index.is_trained
index.train(xb) # Train vectors data index within mockup database
assert index.is_trained
index.add(xb)
D, I = index.search(xb[:5], k) # sanity check
print(I)
print(D)
index.nprobe = 10 # make comparable with experiment above
D, I = index.search(xq, k) # search
print(I[-5:]) # neighbors of the 5 last queries

View File

@@ -0,0 +1,38 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import faiss
import numpy as np
d = 64 # dimension
nb = 100000 # database size
nq = 10000 # nb of queries
np.random.seed(1234) # make reproducible
xb = np.random.random((nb, d)).astype('float32') # 64-dim *nb queries
xb[:, 0] += np.arange(nb) / 1000.
xq = np.random.random((nq, d)).astype('float32')
xq[:, 0] += np.arange(nq) / 1000.
m = 8 # 8 specifies that the number of sub-vector is 8
k = 4 # number of dimension in etracted vector
n_bit = 4 # 4 specifies that each sub-vector is encoded as 4 bits
bbs = 32 # build block size ( bbs % 32 == 0 ) for PQ
index = faiss.IndexPQFastScan(d, m, n_bit, faiss.METRIC_L2)
index_refine = faiss.IndexRefineFlat(index)
# construct FastScan and run index refinement
assert not index_refine.is_trained
index_refine.train(xb) # Train vectors data index within mockup database
assert index_refine.is_trained
index_refine.add(xb)
params = faiss.IndexRefineSearchParameters(k_factor=3)
D, I = index_refine.search(xq[:5], 10, params=params)
print(I)
print(D)
index.nprobe = 10 # make comparable with experiment above
D, I = index.search(xq[:5], k) # search
print(I[-5:])

View File

@@ -0,0 +1,42 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import faiss
from faiss.contrib.evaluation import knn_intersection_measure
from faiss.contrib import datasets
# 64-dim vectors, 50000 vectors in the training, 100000 in database,
# 10000 in queries, dtype ('float32')
ds = datasets.SyntheticDataset(64, 50000, 100000, 10000)
d = 64 # dimension
# Constructing the refine PQ index with SQfp16 with index factory
index_fp16 = faiss.index_factory(d, 'PQ32x4fs,Refine(SQfp16)')
index_fp16.train(ds.get_train())
index_fp16.add(ds.get_database())
# Constructing the refine PQ index with SQ8
index_sq8 = faiss.index_factory(d, 'PQ32x4fs,Refine(SQ8)')
index_sq8.train(ds.get_train())
index_sq8.add(ds.get_database())
# Parameterization on k factor while doing search for index refinement
k_factor = 3.0
params = faiss.IndexRefineSearchParameters(k_factor=k_factor)
# Perform index search using different index refinement
D_fp16, I_fp16 = index_fp16.search(ds.get_queries(), 100, params=params)
D_sq8, I_sq8 = index_sq8.search(ds.get_queries(), 100, params=params)
# Calculating knn intersection measure for different index types on refinement
KIM_fp16 = knn_intersection_measure(I_fp16, ds.get_groundtruth())
KIM_sq8 = knn_intersection_measure(I_sq8, ds.get_groundtruth())
# KNN intersection measure accuracy shows that choosing SQ8 impacts accuracy
assert (KIM_fp16 > KIM_sq8)
print(I_sq8[:5])
print(I_fp16[:5])