Initial commit
This commit is contained in:
29
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/1-Flat.py
vendored
Normal file
29
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/1-Flat.py
vendored
Normal file
@@ -0,0 +1,29 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import numpy as np
|
||||
|
||||
d = 64 # dimension
|
||||
nb = 100000 # database size
|
||||
nq = 10000 # nb of queries
|
||||
np.random.seed(1234) # make reproducible
|
||||
xb = np.random.random((nb, d)).astype('float32')
|
||||
xb[:, 0] += np.arange(nb) / 1000.
|
||||
xq = np.random.random((nq, d)).astype('float32')
|
||||
xq[:, 0] += np.arange(nq) / 1000.
|
||||
|
||||
import faiss # make faiss available
|
||||
index = faiss.IndexFlatL2(d) # build the index
|
||||
print(index.is_trained)
|
||||
index.add(xb) # add vectors to the index
|
||||
print(index.ntotal)
|
||||
|
||||
k = 4 # we want to see 4 nearest neighbors
|
||||
D, I = index.search(xb[:5], k) # sanity check
|
||||
print(I)
|
||||
print(D)
|
||||
D, I = index.search(xq, k) # actual search
|
||||
print(I[:5]) # neighbors of the 5 first queries
|
||||
print(I[-5:]) # neighbors of the 5 last queries
|
||||
34
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/2-IVFFlat.py
vendored
Normal file
34
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/2-IVFFlat.py
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import numpy as np
|
||||
|
||||
d = 64 # dimension
|
||||
nb = 100000 # database size
|
||||
nq = 10000 # nb of queries
|
||||
np.random.seed(1234) # make reproducible
|
||||
xb = np.random.random((nb, d)).astype('float32')
|
||||
xb[:, 0] += np.arange(nb) / 1000.
|
||||
xq = np.random.random((nq, d)).astype('float32')
|
||||
xq[:, 0] += np.arange(nq) / 1000.
|
||||
|
||||
import faiss
|
||||
|
||||
nlist = 100
|
||||
k = 4
|
||||
quantizer = faiss.IndexFlatL2(d) # the other index
|
||||
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
|
||||
# here we specify METRIC_L2, by default it performs inner-product search
|
||||
|
||||
assert not index.is_trained
|
||||
index.train(xb)
|
||||
assert index.is_trained
|
||||
|
||||
index.add(xb) # add may be a bit slower as well
|
||||
D, I = index.search(xq, k) # actual search
|
||||
print(I[-5:]) # neighbors of the 5 last queries
|
||||
index.nprobe = 10 # default nprobe is 1, try a few more
|
||||
D, I = index.search(xq, k)
|
||||
print(I[-5:]) # neighbors of the 5 last queries
|
||||
32
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/3-IVFPQ.py
vendored
Normal file
32
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/3-IVFPQ.py
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import numpy as np
|
||||
|
||||
d = 64 # dimension
|
||||
nb = 100000 # database size
|
||||
nq = 10000 # nb of queries
|
||||
np.random.seed(1234) # make reproducible
|
||||
xb = np.random.random((nb, d)).astype('float32')
|
||||
xb[:, 0] += np.arange(nb) / 1000.
|
||||
xq = np.random.random((nq, d)).astype('float32')
|
||||
xq[:, 0] += np.arange(nq) / 1000.
|
||||
|
||||
import faiss
|
||||
|
||||
nlist = 100
|
||||
m = 8
|
||||
k = 4
|
||||
quantizer = faiss.IndexFlatL2(d) # this remains the same
|
||||
index = faiss.IndexIVFPQ(quantizer, d, nlist, m, 8)
|
||||
# 8 specifies that each sub-vector is encoded as 8 bits
|
||||
index.train(xb)
|
||||
index.add(xb)
|
||||
D, I = index.search(xb[:5], k) # sanity check
|
||||
print(I)
|
||||
print(D)
|
||||
index.nprobe = 10 # make comparable with experiment above
|
||||
D, I = index.search(xq, k) # search
|
||||
print(I[-5:])
|
||||
57
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/4-GPU.py
vendored
Normal file
57
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/4-GPU.py
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import numpy as np
|
||||
|
||||
d = 64 # dimension
|
||||
nb = 100000 # database size
|
||||
nq = 10000 # nb of queries
|
||||
np.random.seed(1234) # make reproducible
|
||||
xb = np.random.random((nb, d)).astype('float32')
|
||||
xb[:, 0] += np.arange(nb) / 1000.
|
||||
xq = np.random.random((nq, d)).astype('float32')
|
||||
xq[:, 0] += np.arange(nq) / 1000.
|
||||
|
||||
import faiss # make faiss available
|
||||
|
||||
res = faiss.StandardGpuResources() # use a single GPU
|
||||
|
||||
## Using a flat index
|
||||
|
||||
index_flat = faiss.IndexFlatL2(d) # build a flat (CPU) index
|
||||
|
||||
# make it a flat GPU index
|
||||
gpu_index_flat = faiss.index_cpu_to_gpu(res, 0, index_flat)
|
||||
|
||||
gpu_index_flat.add(xb) # add vectors to the index
|
||||
print(gpu_index_flat.ntotal)
|
||||
|
||||
k = 4 # we want to see 4 nearest neighbors
|
||||
D, I = gpu_index_flat.search(xq, k) # actual search
|
||||
print(I[:5]) # neighbors of the 5 first queries
|
||||
print(I[-5:]) # neighbors of the 5 last queries
|
||||
|
||||
|
||||
## Using an IVF index
|
||||
|
||||
nlist = 100
|
||||
quantizer = faiss.IndexFlatL2(d) # the other index
|
||||
index_ivf = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
|
||||
# here we specify METRIC_L2, by default it performs inner-product search
|
||||
|
||||
# make it an IVF GPU index
|
||||
gpu_index_ivf = faiss.index_cpu_to_gpu(res, 0, index_ivf)
|
||||
|
||||
assert not gpu_index_ivf.is_trained
|
||||
gpu_index_ivf.train(xb) # add vectors to the index
|
||||
assert gpu_index_ivf.is_trained
|
||||
|
||||
gpu_index_ivf.add(xb) # add vectors to the index
|
||||
print(gpu_index_ivf.ntotal)
|
||||
|
||||
k = 4 # we want to see 4 nearest neighbors
|
||||
D, I = gpu_index_ivf.search(xq, k) # actual search
|
||||
print(I[:5]) # neighbors of the 5 first queries
|
||||
print(I[-5:]) # neighbors of the 5 last queries
|
||||
35
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/5-Multiple-GPUs.py
vendored
Normal file
35
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/5-Multiple-GPUs.py
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import numpy as np
|
||||
|
||||
d = 64 # dimension
|
||||
nb = 100000 # database size
|
||||
nq = 10000 # nb of queries
|
||||
np.random.seed(1234) # make reproducible
|
||||
xb = np.random.random((nb, d)).astype('float32')
|
||||
xb[:, 0] += np.arange(nb) / 1000.
|
||||
xq = np.random.random((nq, d)).astype('float32')
|
||||
xq[:, 0] += np.arange(nq) / 1000.
|
||||
|
||||
import faiss # make faiss available
|
||||
|
||||
ngpus = faiss.get_num_gpus()
|
||||
|
||||
print("number of GPUs:", ngpus)
|
||||
|
||||
cpu_index = faiss.IndexFlatL2(d)
|
||||
|
||||
gpu_index = faiss.index_cpu_to_all_gpus( # build the index
|
||||
cpu_index
|
||||
)
|
||||
|
||||
gpu_index.add(xb) # add vectors to the index
|
||||
print(gpu_index.ntotal)
|
||||
|
||||
k = 4 # we want to see 4 nearest neighbors
|
||||
D, I = gpu_index.search(xq, k) # actual search
|
||||
print(I[:5]) # neighbors of the 5 first queries
|
||||
print(I[-5:]) # neighbors of the 5 last queries
|
||||
35
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/7-PQFastScan.py
vendored
Normal file
35
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/7-PQFastScan.py
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import faiss
|
||||
import numpy as np
|
||||
|
||||
d = 64 # dimension
|
||||
nb = 100000 # database size
|
||||
nq = 10000 # nb of queries
|
||||
np.random.seed(1234) # make reproducible
|
||||
xb = np.random.random((nb, d)).astype('float32') # 64-dim *nb queries
|
||||
xb[:, 0] += np.arange(nb) / 1000.
|
||||
xq = np.random.random((nq, d)).astype('float32')
|
||||
xq[:, 0] += np.arange(nq) / 1000.
|
||||
|
||||
m = 8 # 8 specifies that the number of sub-vector is 8
|
||||
k = 4 # number of dimension in etracted vector
|
||||
n_bit = 4 # 4 specifies that each sub-vector is encoded as 4 bits
|
||||
bbs = 32 # build block size ( bbs % 32 == 0 ) for PQ
|
||||
index = faiss.IndexPQFastScan(d, m, n_bit, faiss.METRIC_L2, bbs)
|
||||
# construct FastScan Index
|
||||
|
||||
assert not index.is_trained
|
||||
index.train(xb) # Train vectors data index within mockup database
|
||||
assert index.is_trained
|
||||
|
||||
index.add(xb)
|
||||
D, I = index.search(xb[:5], k) # sanity check
|
||||
print(I)
|
||||
print(D)
|
||||
index.nprobe = 10 # make comparable with experiment above
|
||||
D, I = index.search(xq, k) # search
|
||||
print(I[-5:]) # neighbors of the 5 last queries
|
||||
38
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/8-PQFastScanRefine.py
vendored
Normal file
38
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/8-PQFastScanRefine.py
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import faiss
|
||||
import numpy as np
|
||||
|
||||
d = 64 # dimension
|
||||
nb = 100000 # database size
|
||||
nq = 10000 # nb of queries
|
||||
np.random.seed(1234) # make reproducible
|
||||
xb = np.random.random((nb, d)).astype('float32') # 64-dim *nb queries
|
||||
xb[:, 0] += np.arange(nb) / 1000.
|
||||
xq = np.random.random((nq, d)).astype('float32')
|
||||
xq[:, 0] += np.arange(nq) / 1000.
|
||||
|
||||
m = 8 # 8 specifies that the number of sub-vector is 8
|
||||
k = 4 # number of dimension in etracted vector
|
||||
n_bit = 4 # 4 specifies that each sub-vector is encoded as 4 bits
|
||||
bbs = 32 # build block size ( bbs % 32 == 0 ) for PQ
|
||||
|
||||
index = faiss.IndexPQFastScan(d, m, n_bit, faiss.METRIC_L2)
|
||||
index_refine = faiss.IndexRefineFlat(index)
|
||||
# construct FastScan and run index refinement
|
||||
|
||||
assert not index_refine.is_trained
|
||||
index_refine.train(xb) # Train vectors data index within mockup database
|
||||
assert index_refine.is_trained
|
||||
|
||||
index_refine.add(xb)
|
||||
params = faiss.IndexRefineSearchParameters(k_factor=3)
|
||||
D, I = index_refine.search(xq[:5], 10, params=params)
|
||||
print(I)
|
||||
print(D)
|
||||
index.nprobe = 10 # make comparable with experiment above
|
||||
D, I = index.search(xq[:5], k) # search
|
||||
print(I[-5:])
|
||||
42
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/9-RefineComparison.py
vendored
Normal file
42
packages/leann-backend-hnsw/third_party/faiss/tutorial/python/9-RefineComparison.py
vendored
Normal file
@@ -0,0 +1,42 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import faiss
|
||||
|
||||
from faiss.contrib.evaluation import knn_intersection_measure
|
||||
from faiss.contrib import datasets
|
||||
|
||||
# 64-dim vectors, 50000 vectors in the training, 100000 in database,
|
||||
# 10000 in queries, dtype ('float32')
|
||||
ds = datasets.SyntheticDataset(64, 50000, 100000, 10000)
|
||||
d = 64 # dimension
|
||||
|
||||
# Constructing the refine PQ index with SQfp16 with index factory
|
||||
index_fp16 = faiss.index_factory(d, 'PQ32x4fs,Refine(SQfp16)')
|
||||
index_fp16.train(ds.get_train())
|
||||
index_fp16.add(ds.get_database())
|
||||
|
||||
# Constructing the refine PQ index with SQ8
|
||||
index_sq8 = faiss.index_factory(d, 'PQ32x4fs,Refine(SQ8)')
|
||||
index_sq8.train(ds.get_train())
|
||||
index_sq8.add(ds.get_database())
|
||||
|
||||
# Parameterization on k factor while doing search for index refinement
|
||||
k_factor = 3.0
|
||||
params = faiss.IndexRefineSearchParameters(k_factor=k_factor)
|
||||
|
||||
# Perform index search using different index refinement
|
||||
D_fp16, I_fp16 = index_fp16.search(ds.get_queries(), 100, params=params)
|
||||
D_sq8, I_sq8 = index_sq8.search(ds.get_queries(), 100, params=params)
|
||||
|
||||
# Calculating knn intersection measure for different index types on refinement
|
||||
KIM_fp16 = knn_intersection_measure(I_fp16, ds.get_groundtruth())
|
||||
KIM_sq8 = knn_intersection_measure(I_sq8, ds.get_groundtruth())
|
||||
|
||||
# KNN intersection measure accuracy shows that choosing SQ8 impacts accuracy
|
||||
assert (KIM_fp16 > KIM_sq8)
|
||||
|
||||
print(I_sq8[:5])
|
||||
print(I_fp16[:5])
|
||||
Reference in New Issue
Block a user