Initial commit

This commit is contained in:
yichuan520030910320
2025-06-30 09:05:05 +00:00
commit 46f6cc100b
1231 changed files with 278432 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
# Benchmark of IVF variants
This is a benchmark of IVF index variants, looking at compression vs. speed vs. accuracy.
The results are in [this wiki chapter](https://github.com/facebookresearch/faiss/wiki/Indexing-1G-vectors)
The code is organized as:
- `datasets.py`: code to access the datafiles, compute the ground-truth and report accuracies
- `bench_all_ivf.py`: evaluate one type of inverted file
- `run_on_cluster_generic.bash`: call `bench_all_ivf.py` for all tested types of indices.
Since the number of experiments is quite large the script is structured so that the benchmark can be run on a cluster.
- `parse_bench_all_ivf.py`: make nice tradeoff plots from all the results.
The code depends on Faiss and can use 1 to 8 GPUs to do the k-means clustering for large vocabularies.
It was run in October 2018 for the results in the wiki.

View File

@@ -0,0 +1,567 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import argparse
import os
import sys
import time
import json
import faiss
import numpy as np
try:
import datasets_fb as datasets
except ModuleNotFoundError:
import datasets_oss as datasets
sanitize = datasets.sanitize
def unwind_index_ivf(index):
if isinstance(index, faiss.IndexPreTransform):
assert index.chain.size() == 1
vt = index.chain.at(0)
index_ivf, vt2 = unwind_index_ivf(faiss.downcast_index(index.index))
assert vt2 is None
if vt is None:
vt = lambda x: x
else:
vt = faiss.downcast_VectorTransform(vt)
return index_ivf, vt
if hasattr(faiss, "IndexRefine") and isinstance(index, faiss.IndexRefine):
return unwind_index_ivf(faiss.downcast_index(index.base_index))
if isinstance(index, faiss.IndexIVF):
return index, None
else:
return None, None
def apply_AQ_options(index, args):
# if not(
# isinstance(index, faiss.IndexAdditiveQuantize) or
# isinstance(index, faiss.IndexIVFAdditiveQuantizer)):
# return
if args.RQ_train_default:
print("set default training for RQ")
index.rq.train_type
index.rq.train_type = faiss.ResidualQuantizer.Train_default
if args.RQ_beam_size != -1:
print("set RQ beam size to", args.RQ_beam_size)
index.rq.max_beam_size
index.rq.max_beam_size = args.RQ_beam_size
if args.LSQ_encode_ils_iters != -1:
print("set LSQ ils iterations to", args.LSQ_encode_ils_iters)
index.lsq.encode_ils_iters
index.lsq.encode_ils_iters = args.LSQ_encode_ils_iters
if args.RQ_use_beam_LUT != -1:
print("set RQ beam LUT to", args.RQ_use_beam_LUT)
index.rq.use_beam_LUT
index.rq.use_beam_LUT = args.RQ_use_beam_LUT
def eval_setting(index, xq, gt, k, inter, min_time):
""" evaluate searching in terms of precision vs. speed """
nq = xq.shape[0]
ivf_stats = faiss.cvar.indexIVF_stats
ivf_stats.reset()
nrun = 0
t0 = time.time()
while True:
D, I = index.search(xq, k)
nrun += 1
t1 = time.time()
if t1 - t0 > min_time:
break
ms_per_query = ((t1 - t0) * 1000.0 / nq / nrun)
res = {
"ms_per_query": ms_per_query,
"nrun": nrun
}
res["n"] = ms_per_query
if inter:
rank = k
inter_measure = faiss.eval_intersection(gt[:, :rank], I[:, :rank]) / (nq * rank)
print("%.4f" % inter_measure, end=' ')
res["inter_measure"] = inter_measure
else:
res["recalls"] = {}
for rank in 1, 10, 100:
recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
print("%.4f" % recall, end=' ')
res["recalls"][rank] = recall
print(" %9.5f " % ms_per_query, end=' ')
print("%12d " % (ivf_stats.ndis / nrun), end=' ')
print(nrun)
res["ndis"] = ivf_stats.ndis / nrun
return res
######################################################
# Training
######################################################
def run_train(args, ds, res):
nq, d = ds.nq, ds.d
nb, d = ds.nq, ds.d
print("build index, key=", args.indexkey)
index = faiss.index_factory(
d, args.indexkey, faiss.METRIC_L2 if ds.metric == "L2" else
faiss.METRIC_INNER_PRODUCT
)
index_ivf, vec_transform = unwind_index_ivf(index)
if args.by_residual != -1:
by_residual = args.by_residual == 1
print("setting by_residual = ", by_residual)
index_ivf.by_residual # check if field exists
index_ivf.by_residual = by_residual
if index_ivf:
print("Update add-time parameters")
# adjust default parameters used at add time for quantizers
# because otherwise the assignment is inaccurate
quantizer = faiss.downcast_index(index_ivf.quantizer)
if isinstance(quantizer, faiss.IndexRefine):
print(" update quantizer k_factor=", quantizer.k_factor, end=" -> ")
quantizer.k_factor = 32 if index_ivf.nlist < 1e6 else 64
print(quantizer.k_factor)
base_index = faiss.downcast_index(quantizer.base_index)
if isinstance(base_index, faiss.IndexIVF):
print(" update quantizer nprobe=", base_index.nprobe, end=" -> ")
base_index.nprobe = (
16 if base_index.nlist < 1e5 else
32 if base_index.nlist < 4e6 else
64)
print(base_index.nprobe)
elif isinstance(quantizer, faiss.IndexHNSW):
hnsw = quantizer.hnsw
print(
f" update HNSW quantizer options, before: "
f"{hnsw.efSearch=:} {hnsw.efConstruction=:}"
)
hnsw.efSearch = 40 if index_ivf.nlist < 4e6 else 64
hnsw.efConstruction = 200
print(f" after: {hnsw.efSearch=:} {hnsw.efConstruction=:}")
apply_AQ_options(index_ivf or index, args)
if index_ivf:
index_ivf.verbose = True
index_ivf.quantizer.verbose = True
index_ivf.cp.verbose = True
else:
index.verbose = True
maxtrain = args.maxtrain
if maxtrain == 0:
if 'IMI' in args.indexkey:
maxtrain = int(256 * 2 ** (np.log2(index_ivf.nlist) / 2))
elif index_ivf:
maxtrain = 50 * index_ivf.nlist
else:
# just guess...
maxtrain = 256 * 100
maxtrain = max(maxtrain, 256 * 100)
print("setting maxtrain to %d" % maxtrain)
try:
xt2 = ds.get_train(maxtrain=maxtrain)
except NotImplementedError:
print("No training set: training on database")
xt2 = ds.get_database()[:maxtrain]
print("train, size", xt2.shape)
assert np.all(np.isfinite(xt2))
if (isinstance(vec_transform, faiss.OPQMatrix) and
isinstance(index_ivf, faiss.IndexIVFPQFastScan)):
print(" Forcing OPQ training PQ to PQ4")
ref_pq = index_ivf.pq
training_pq = faiss.ProductQuantizer(
ref_pq.d, ref_pq.M, ref_pq.nbits
)
vec_transform.pq
vec_transform.pq = training_pq
if args.get_centroids_from == '':
if args.clustering_niter >= 0:
print(("setting nb of clustering iterations to %d" %
args.clustering_niter))
index_ivf.cp.niter = args.clustering_niter
if args.train_on_gpu:
print("add a training index on GPU")
train_index = faiss.index_cpu_to_all_gpus(
faiss.IndexFlatL2(index_ivf.d))
index_ivf.clustering_index = train_index
else:
print("Getting centroids from", args.get_centroids_from)
src_index = faiss.read_index(args.get_centroids_from)
src_quant = faiss.downcast_index(src_index.quantizer)
centroids = src_quant.reconstruct_n()
print(" centroid table shape", centroids.shape)
if isinstance(vec_transform, faiss.VectorTransform):
print(" training vector transform")
vec_transform.train(xt2)
print(" transform centroids")
centroids = vec_transform.apply_py(centroids)
if not index_ivf.quantizer.is_trained:
print(" training quantizer")
index_ivf.quantizer.train(centroids)
print(" add centroids to quantizer")
index_ivf.quantizer.add(centroids)
del src_index
t0 = time.time()
index.train(xt2)
res.train_time = time.time() - t0
print(" train in %.3f s" % res.train_time)
return index
######################################################
# Populating index
######################################################
def run_add(args, ds, index, res):
print("adding")
t0 = time.time()
if args.add_bs == -1:
assert args.split == [1, 0], "split not supported with full batch add"
index.add(sanitize(ds.get_database()))
else:
totn = ds.nb // args.split[0] # approximate
i0 = 0
print(f"Adding in block sizes {args.add_bs} with split {args.split}")
for xblock in ds.database_iterator(bs=args.add_bs, split=args.split):
i1 = i0 + len(xblock)
print(" adding %d:%d / %d [%.3f s, RSS %d kiB] " % (
i0, i1, totn, time.time() - t0,
faiss.get_mem_usage_kb()))
index.add(xblock)
i0 = i1
res.t_add = time.time() - t0
print(f" add in {res.t_add:.3f} s index size {index.ntotal}")
######################################################
# Search
######################################################
def run_search(args, ds, index, res):
index_ivf, vec_transform = unwind_index_ivf(index)
if args.no_precomputed_tables:
if isinstance(index_ivf, faiss.IndexIVFPQ):
print("disabling precomputed table")
index_ivf.use_precomputed_table = -1
index_ivf.precomputed_table.clear()
if args.indexfile:
print("index size on disk: ", os.stat(args.indexfile).st_size)
if hasattr(index, "code_size"):
print("vector code_size", index.code_size)
if hasattr(index_ivf, "code_size"):
print("vector code_size (IVF)", index_ivf.code_size)
print("current RSS:", faiss.get_mem_usage_kb() * 1024)
precomputed_table_size = 0
if hasattr(index_ivf, 'precomputed_table'):
precomputed_table_size = index_ivf.precomputed_table.size() * 4
print("precomputed tables size:", precomputed_table_size)
# Index is ready
xq = sanitize(ds.get_queries())
nq, d = xq.shape
gt = ds.get_groundtruth(k=args.k)
if not args.accept_short_gt: # Deep1B has only a single NN per query
assert gt.shape[1] == args.k
if args.searchthreads != -1:
print("Setting nb of threads to", args.searchthreads)
faiss.omp_set_num_threads(args.searchthreads)
else:
print("nb search threads: ", faiss.omp_get_max_threads())
ps = faiss.ParameterSpace()
ps.initialize(index)
parametersets = args.searchparams
if args.inter:
header = (
'%-40s inter@%3d time(ms/q) nb distances #runs' %
("parameters", args.k)
)
else:
header = (
'%-40s R@1 R@10 R@100 time(ms/q) nb distances #runs' %
"parameters"
)
res.search_results = {}
if parametersets == ['autotune']:
ps.n_experiments = args.n_autotune
ps.min_test_duration = args.min_test_duration
for kv in args.autotune_max:
k, vmax = kv.split(':')
vmax = float(vmax)
print("limiting %s to %g" % (k, vmax))
pr = ps.add_range(k)
values = faiss.vector_to_array(pr.values)
values = np.array([v for v in values if v < vmax])
faiss.copy_array_to_vector(values, pr.values)
for kv in args.autotune_range:
k, vals = kv.split(':')
vals = np.fromstring(vals, sep=',')
print("setting %s to %s" % (k, vals))
pr = ps.add_range(k)
faiss.copy_array_to_vector(vals, pr.values)
# setup the Criterion object
if args.inter:
print("Optimize for intersection @ ", args.k)
crit = faiss.IntersectionCriterion(nq, args.k)
else:
print("Optimize for 1-recall @ 1")
crit = faiss.OneRecallAtRCriterion(nq, 1)
# by default, the criterion will request only 1 NN
crit.nnn = args.k
crit.set_groundtruth(None, gt.astype('int64'))
# then we let Faiss find the optimal parameters by itself
print("exploring operating points, %d threads" % faiss.omp_get_max_threads());
ps.display()
t0 = time.time()
op = ps.explore(index, xq, crit)
res.t_explore = time.time() - t0
print("Done in %.3f s, available OPs:" % res.t_explore)
op.display()
print("Re-running evaluation on selected OPs")
print(header)
opv = op.optimal_pts
maxw = max(max(len(opv.at(i).key) for i in range(opv.size())), 40)
for i in range(opv.size()):
opt = opv.at(i)
ps.set_index_parameters(index, opt.key)
print(opt.key.ljust(maxw), end=' ')
sys.stdout.flush()
res_i = eval_setting(index, xq, gt, args.k, args.inter, args.min_test_duration)
res.search_results[opt.key] = res_i
else:
print(header)
for param in parametersets:
print("%-40s " % param, end=' ')
sys.stdout.flush()
ps.set_index_parameters(index, param)
res_i = eval_setting(index, xq, gt, args.k, args.inter, args.min_test_duration)
res.search_results[param] = res_i
######################################################
# Driver function
######################################################
def main():
parser = argparse.ArgumentParser()
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('general options')
aa('--nthreads', default=-1, type=int,
help='nb of threads to use at train and add time')
aa('--json', default=False, action="store_true",
help="output stats in JSON format at the end")
aa('--todo', default=["check_files"],
choices=["train", "add", "search", "check_files"],
nargs="+", help='what to do (check_files means decide depending on which index files exist)')
group = parser.add_argument_group('dataset options')
aa('--db', default='deep1M', help='dataset')
aa('--compute_gt', default=False, action='store_true',
help='compute and store the groundtruth')
aa('--force_IP', default=False, action="store_true",
help='force IP search instead of L2')
aa('--accept_short_gt', default=False, action='store_true',
help='work around a problem with Deep1B GT')
group = parser.add_argument_group('index construction')
aa('--indexkey', default='HNSW32', help='index_factory type')
aa('--trained_indexfile', default='',
help='file to read or write a trained index from')
aa('--maxtrain', default=256 * 256, type=int,
help='maximum number of training points (0 to set automatically)')
aa('--indexfile', default='', help='file to read or write index from')
aa('--split', default=[1, 0], type=int, nargs=2, help="database split")
aa('--add_bs', default=-1, type=int,
help='add elements index by batches of this size')
group = parser.add_argument_group('IVF options')
aa('--by_residual', default=-1, type=int,
help="set if index should use residuals (default=unchanged)")
aa('--no_precomputed_tables', action='store_true', default=False,
help='disable precomputed tables (uses less memory)')
aa('--get_centroids_from', default='',
help='get the centroids from this index (to speed up training)')
aa('--clustering_niter', default=-1, type=int,
help='number of clustering iterations (-1 = leave default)')
aa('--train_on_gpu', default=False, action='store_true',
help='do training on GPU')
group = parser.add_argument_group('index-specific options')
aa('--M0', default=-1, type=int, help='size of base level for HNSW')
aa('--RQ_train_default', default=False, action="store_true",
help='disable progressive dim training for RQ')
aa('--RQ_beam_size', default=-1, type=int,
help='set beam size at add time')
aa('--LSQ_encode_ils_iters', default=-1, type=int,
help='ILS iterations for LSQ')
aa('--RQ_use_beam_LUT', default=-1, type=int,
help='use beam LUT at add time')
group = parser.add_argument_group('searching')
aa('--k', default=100, type=int, help='nb of nearest neighbors')
aa('--inter', default=False, action='store_true',
help='use intersection measure instead of 1-recall as metric')
aa('--searchthreads', default=-1, type=int,
help='nb of threads to use at search time')
aa('--searchparams', nargs='+', default=['autotune'],
help="search parameters to use (can be autotune or a list of params)")
aa('--n_autotune', default=500, type=int,
help="max nb of autotune experiments")
aa('--autotune_max', default=[], nargs='*',
help='set max value for autotune variables format "var:val" (exclusive)')
aa('--autotune_range', default=[], nargs='*',
help='set complete autotune range, format "var:val1,val2,..."')
aa('--min_test_duration', default=3.0, type=float,
help='run test at least for so long to avoid jitter')
aa('--indexes_to_merge', default=[], nargs="*",
help="load these indexes to search and merge them before searching")
args = parser.parse_args()
if args.todo == ["check_files"]:
if os.path.exists(args.indexfile):
args.todo = ["search"]
elif os.path.exists(args.trained_indexfile):
args.todo = ["add", "search"]
else:
args.todo = ["train", "add", "search"]
print("setting todo to", args.todo)
print("args:", args)
os.system('echo -n "nb processors "; '
'cat /proc/cpuinfo | grep ^processor | wc -l; '
'cat /proc/cpuinfo | grep ^"model name" | tail -1')
# object to collect results
res = argparse.Namespace()
res.args = args.__dict__
res.cpu_model = [
l for l in open("/proc/cpuinfo", "r")
if "model name" in l][0]
print("Load dataset")
ds = datasets.load_dataset(
dataset=args.db, compute_gt=args.compute_gt)
if args.force_IP:
ds.metric = "IP"
print(ds)
if args.nthreads != -1:
print("Set nb of threads to", args.nthreads)
faiss.omp_set_num_threads(args.nthreads)
else:
print("nb threads: ", faiss.omp_get_max_threads())
index = None
if "train" in args.todo:
print("================== Training index")
index = run_train(args, ds, res)
if args.trained_indexfile:
print("storing trained index", args.trained_indexfile)
faiss.write_index(index, args.trained_indexfile)
if "add" in args.todo:
if not index:
assert args.trained_indexfile
print("reading trained index", args.trained_indexfile)
index = faiss.read_index(args.trained_indexfile)
print("================== Adding vectors to index")
run_add(args, ds, index, res)
if args.indexfile:
print("storing", args.indexfile)
faiss.write_index(index, args.indexfile)
if "search" in args.todo:
if not index:
if args.indexfile:
print("reading index", args.indexfile)
index = faiss.read_index(args.indexfile)
elif args.indexes_to_merge:
print(f"Merging {len(args.indexes_to_merge)} indexes")
sz = 0
for fname in args.indexes_to_merge:
print(f" reading {fname} (current size {sz})")
index_i = faiss.read_index(fname)
if index is None:
index = index_i
else:
index.merge_from(index_i, index.ntotal)
sz = index.ntotal
else:
assert False, "provide --indexfile"
print("================== Searching")
run_search(args, ds, index, res)
if args.json:
print("JSON results:", json.dumps(res.__dict__))
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,116 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import numpy as np
import faiss
import argparse
import datasets
from datasets import sanitize
######################################################
# Command-line parsing
######################################################
parser = argparse.ArgumentParser()
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('dataset options')
aa('--db', default='deep1M', help='dataset')
aa('--nt', default=65536, type=int)
aa('--nb', default=100000, type=int)
aa('--nt_sample', default=0, type=int)
group = parser.add_argument_group('kmeans options')
aa('--k', default=256, type=int)
aa('--seed', default=12345, type=int)
aa('--pcadim', default=-1, type=int, help='PCA to this dimension')
aa('--niter', default=25, type=int)
aa('--eval_freq', default=100, type=int)
args = parser.parse_args()
print("args:", args)
os.system('echo -n "nb processors "; '
'cat /proc/cpuinfo | grep ^processor | wc -l; '
'cat /proc/cpuinfo | grep ^"model name" | tail -1')
ngpu = faiss.get_num_gpus()
print("nb GPUs:", ngpu)
######################################################
# Load dataset
######################################################
xt, xb, xq, gt = datasets.load_data(dataset=args.db)
if args.nt_sample == 0:
xt_pca = xt[args.nt:args.nt + 10000]
xt = xt[:args.nt]
else:
xt_pca = xt[args.nt_sample:args.nt_sample + 10000]
rs = np.random.RandomState(args.seed)
idx = rs.choice(args.nt_sample, size=args.nt, replace=False)
xt = xt[idx]
xb = xb[:args.nb]
d = xb.shape[1]
if args.pcadim != -1:
print("training PCA: %d -> %d" % (d, args.pcadim))
pca = faiss.PCAMatrix(d, args.pcadim)
pca.train(sanitize(xt_pca))
xt = pca.apply_py(sanitize(xt))
xb = pca.apply_py(sanitize(xb))
d = xb.shape[1]
######################################################
# Run clustering
######################################################
index = faiss.IndexFlatL2(d)
if ngpu > 0:
print("moving index to GPU")
index = faiss.index_cpu_to_all_gpus(index)
clustering = faiss.Clustering(d, args.k)
clustering.verbose = True
clustering.seed = args.seed
clustering.max_points_per_centroid = 10**6
clustering.min_points_per_centroid = 1
centroids = None
for iter0 in range(0, args.niter, args.eval_freq):
iter1 = min(args.niter, iter0 + args.eval_freq)
clustering.niter = iter1 - iter0
if iter0 > 0:
faiss.copy_array_to_vector(centroids.ravel(), clustering.centroids)
clustering.train(sanitize(xt), index)
index.reset()
centroids = faiss.vector_to_array(clustering.centroids).reshape(args.k, d)
index.add(centroids)
_, I = index.search(sanitize(xb), 1)
error = ((xb - centroids[I.ravel()]) ** 2).sum()
print("iter1=%d quantization error on test: %.4f" % (iter1, error))

View File

@@ -0,0 +1,307 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import time
import sys
import os
import argparse
import numpy as np
def eval_recalls(name, I, gt, times):
k = I.shape[1]
s = "%-40s recall" % name
nq = len(gt)
for rank in 1, 10, 100, 1000:
if rank > k:
break
recall = (I[:, :rank] == gt[:, :1]).sum() / nq
s += "@%d: %.4f " % (rank, recall)
s += "time: %.4f s (± %.4f)" % (np.mean(times), np.std(times))
print(s)
def eval_inters(name, I, gt, times):
k = I.shape[1]
s = "%-40s inter" % name
nq = len(gt)
for rank in 1, 10, 100, 1000:
if rank > k:
break
ninter = 0
for i in range(nq):
ninter += np.intersect1d(I[i, :rank], gt[i, :rank]).size
inter = ninter / (nq * rank)
s += "@%d: %.4f " % (rank, inter)
s += "time: %.4f s (± %.4f)" % (np.mean(times), np.std(times))
print(s)
def main():
parser = argparse.ArgumentParser()
def aa(*args, **kwargs):
group.add_argument(*args, **kwargs)
group = parser.add_argument_group('dataset options')
aa('--db', default='deep1M', help='dataset')
aa('--measure', default="1-recall",
help="perf measure to use: 1-recall or inter")
aa('--download', default=False, action="store_true")
aa('--lib', default='faiss', help='library to use (faiss or scann)')
aa('--thenscann', default=False, action="store_true")
aa('--base_dir', default='/checkpoint/matthijs/faiss_improvements/cmp_ivf_scan_2')
group = parser.add_argument_group('searching')
aa('--k', default=10, type=int, help='nb of nearest neighbors')
aa('--pre_reorder_k', default="0,10,100,1000", help='values for reorder_k')
aa('--nprobe', default="1,2,5,10,20,50,100,200", help='values for nprobe')
aa('--nrun', default=5, type=int, help='nb of runs to perform')
args = parser.parse_args()
print("args:", args)
pre_reorder_k_tab = [int(x) for x in args.pre_reorder_k.split(',')]
nprobe_tab = [int(x) for x in args.nprobe.split(',')]
os.system('echo -n "nb processors "; '
'cat /proc/cpuinfo | grep ^processor | wc -l; '
'cat /proc/cpuinfo | grep ^"model name" | tail -1')
cache_dir = args.base_dir + "/" + args.db + "/"
k = args.k
nrun = args.nrun
if not os.path.exists(cache_dir + "xb.npy"):
# prepare cache
from datasets import load_dataset
ds = load_dataset(args.db, download=args.download)
print(ds)
# store for SCANN
os.system(f"rm -rf {cache_dir}; mkdir -p {cache_dir}")
tosave = dict(
xb = ds.get_database(),
xq = ds.get_queries(),
gt = ds.get_groundtruth()
)
for name, v in tosave.items():
fname = cache_dir + "/" + name + ".npy"
print("save", fname)
np.save(fname, v)
open(cache_dir + "metric", "w").write(ds.metric)
dataset = {}
for kn in "xb xq gt".split():
fname = cache_dir + "/" + kn + ".npy"
print("load", fname)
dataset[kn] = np.load(fname)
xb = dataset["xb"]
xq = dataset["xq"]
gt = dataset["gt"]
distance_measure = open(cache_dir + "metric").read()
if args.lib == "faiss":
import faiss
name1_to_metric = {
"IP": faiss.METRIC_INNER_PRODUCT,
"L2": faiss.METRIC_L2
}
index_fname = cache_dir + "index.faiss"
if not os.path.exists(index_fname):
index = faiss_make_index(
xb, name1_to_metric[distance_measure], index_fname)
else:
index = faiss.read_index(index_fname)
faiss_eval_search(
index, xq, xb, nprobe_tab, pre_reorder_k_tab, k, gt,
nrun, args.measure
)
if args.lib == "scann":
from scann.scann_ops.py import scann_ops_pybind
name1_to_name2 = {
"IP": "dot_product",
"L2": "squared_l2"
}
scann_dir = cache_dir + "/scann1.1.1_serialized"
if os.path.exists(scann_dir + "/scann_config.pb"):
searcher = scann_ops_pybind.load_searcher(scann_dir)
else:
searcher = scann_make_index(xb, name1_to_name2[distance_measure], scann_dir, 0)
scann_dir = cache_dir + "/scann1.1.1_serialized_reorder"
if os.path.exists(scann_dir + "/scann_config.pb"):
searcher_reo = scann_ops_pybind.load_searcher(scann_dir)
else:
searcher_reo = scann_make_index(xb, name1_to_name2[distance_measure], scann_dir, 100)
scann_eval_search(
searcher, searcher_reo,
xq, xb, nprobe_tab, pre_reorder_k_tab, k, gt,
nrun, args.measure
)
if args.lib != "scann" and args.thenscann:
# just append --lib scann, that will override the previous cmdline
# options
cmdline = " ".join(sys.argv) + " --lib scann"
cmdline = (
". ~/anaconda3/etc/profile.d/conda.sh ; " +
"conda activate scann_1.1.1; "
"python -u " + cmdline)
print("running", cmdline)
os.system(cmdline)
###############################################################
# SCANN
###############################################################
def scann_make_index(xb, distance_measure, scann_dir, reorder_k):
import scann
print("build index")
if distance_measure == "dot_product":
thr = 0.2
else:
thr = 0
k = 10
sb = scann.scann_ops_pybind.builder(xb, k, distance_measure)
sb = sb.tree(num_leaves=2000, num_leaves_to_search=100, training_sample_size=250000)
sb = sb.score_ah(2, anisotropic_quantization_threshold=thr)
if reorder_k > 0:
sb = sb.reorder(reorder_k)
searcher = sb.build()
print("done")
print("write index to", scann_dir)
os.system(f"rm -rf {scann_dir}; mkdir -p {scann_dir}")
# os.mkdir(scann_dir)
searcher.serialize(scann_dir)
return searcher
def scann_eval_search(
searcher, searcher_reo,
xq, xb, nprobe_tab, pre_reorder_k_tab, k, gt,
nrun, measure):
# warmup
for _run in range(5):
searcher.search_batched(xq)
for nprobe in nprobe_tab:
for pre_reorder_k in pre_reorder_k_tab:
times = []
for _run in range(nrun):
if pre_reorder_k == 0:
t0 = time.time()
I, D = searcher.search_batched(
xq, leaves_to_search=nprobe, final_num_neighbors=k
)
t1 = time.time()
else:
t0 = time.time()
I, D = searcher_reo.search_batched(
xq, leaves_to_search=nprobe, final_num_neighbors=k,
pre_reorder_num_neighbors=pre_reorder_k
)
t1 = time.time()
times.append(t1 - t0)
header = "SCANN nprobe=%4d reo=%4d" % (nprobe, pre_reorder_k)
if measure == "1-recall":
eval_recalls(header, I, gt, times)
else:
eval_inters(header, I, gt, times)
###############################################################
# Faiss
###############################################################
def faiss_make_index(xb, metric_type, fname):
import faiss
d = xb.shape[1]
M = d // 2
index = faiss.index_factory(d, f"IVF2000,PQ{M}x4fs", metric_type)
# if not by_residual:
# print("setting no residual")
# index.by_residual = False
print("train")
index.train(xb[:250000])
print("add")
index.add(xb)
print("write index", fname)
faiss.write_index(index, fname)
return index
def faiss_eval_search(
index, xq, xb, nprobe_tab, pre_reorder_k_tab,
k, gt, nrun, measure
):
import faiss
print("use precomputed table=", index.use_precomputed_table,
"by residual=", index.by_residual)
print("adding a refine index")
index_refine = faiss.IndexRefineFlat(index, faiss.swig_ptr(xb))
print("set single thread")
faiss.omp_set_num_threads(1)
print("warmup")
for _run in range(5):
index.search(xq, k)
print("run timing")
for nprobe in nprobe_tab:
for pre_reorder_k in pre_reorder_k_tab:
index.nprobe = nprobe
times = []
for _run in range(nrun):
if pre_reorder_k == 0:
t0 = time.time()
D, I = index.search(xq, k)
t1 = time.time()
else:
index_refine.k_factor = pre_reorder_k / k
t0 = time.time()
D, I = index_refine.search(xq, k)
t1 = time.time()
times.append(t1 - t0)
header = "Faiss nprobe=%4d reo=%4d" % (nprobe, pre_reorder_k)
if measure == "1-recall":
eval_recalls(header, I, gt, times)
else:
eval_inters(header, I, gt, times)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,136 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
"""
Common functions to load datasets and compute their ground-truth
"""
import time
import numpy as np
import faiss
from faiss.contrib import datasets as faiss_datasets
print("path:", faiss_datasets.__file__)
faiss_datasets.dataset_basedir = '/checkpoint/matthijs/simsearch/'
def sanitize(x):
return np.ascontiguousarray(x, dtype='float32')
#################################################################
# Dataset
#################################################################
class DatasetCentroids(faiss_datasets.Dataset):
def __init__(self, ds, indexfile):
self.d = ds.d
self.metric = ds.metric
self.nq = ds.nq
self.xq = ds.get_queries()
# get the xb set
src_index = faiss.read_index(indexfile)
src_quant = faiss.downcast_index(src_index.quantizer)
centroids = faiss.vector_to_array(src_quant.xb)
self.xb = centroids.reshape(-1, self.d)
self.nb = self.nt = len(self.xb)
def get_queries(self):
return self.xq
def get_database(self):
return self.xb
def get_train(self, maxtrain=None):
return self.xb
def get_groundtruth(self, k=100):
return faiss.knn(
self.xq, self.xb, k,
faiss.METRIC_L2 if self.metric == 'L2' else faiss.METRIC_INNER_PRODUCT
)[1]
def load_dataset(dataset='deep1M', compute_gt=False, download=False):
print("load data", dataset)
if dataset == 'sift1M':
return faiss_datasets.DatasetSIFT1M()
elif dataset.startswith('bigann'):
dbsize = 1000 if dataset == "bigann1B" else int(dataset[6:-1])
return faiss_datasets.DatasetBigANN(nb_M=dbsize)
elif dataset.startswith("deep_centroids_"):
ncent = int(dataset[len("deep_centroids_"):])
centdir = "/checkpoint/matthijs/bench_all_ivf/precomputed_clusters"
return DatasetCentroids(
faiss_datasets.DatasetDeep1B(nb=1000000),
f"{centdir}/clustering.dbdeep1M.IVF{ncent}.faissindex"
)
elif dataset.startswith("deep"):
szsuf = dataset[4:]
if szsuf[-1] == 'M':
dbsize = 10 ** 6 * int(szsuf[:-1])
elif szsuf == '1B':
dbsize = 10 ** 9
elif szsuf[-1] == 'k':
dbsize = 1000 * int(szsuf[:-1])
else:
assert False, "did not recognize suffix " + szsuf
return faiss_datasets.DatasetDeep1B(nb=dbsize)
elif dataset == "music-100":
return faiss_datasets.DatasetMusic100()
elif dataset == "glove":
return faiss_datasets.DatasetGlove(download=download)
else:
assert False
#################################################################
# Evaluation
#################################################################
def evaluate_DI(D, I, gt):
nq = gt.shape[0]
k = I.shape[1]
rank = 1
while rank <= k:
recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
print("R@%d: %.4f" % (rank, recall), end=' ')
rank *= 10
def evaluate(xq, gt, index, k=100, endl=True):
t0 = time.time()
D, I = index.search(xq, k)
t1 = time.time()
nq = xq.shape[0]
print("\t %8.4f ms per query, " % (
(t1 - t0) * 1000.0 / nq), end=' ')
rank = 1
while rank <= k:
recall = (I[:, :rank] == gt[:, :1]).sum() / float(nq)
print("R@%d: %.4f" % (rank, recall), end=' ')
rank *= 10
if endl:
print()
return D, I

View File

@@ -0,0 +1,27 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import logging
# https://stackoverflow.com/questions/7016056/python-logging-not-outputting-anything
logging.basicConfig()
logger = logging.getLogger('faiss.contrib.exhaustive_search')
logger.setLevel(logging.INFO)
from faiss.contrib import datasets
from faiss.contrib.exhaustive_search import knn_ground_truth
from faiss.contrib import vecs_io
ds = datasets.DatasetDeep1B(nb=int(1e9))
print("computing GT matches for", ds)
D, I = knn_ground_truth(
ds.get_queries(),
ds.database_iterator(bs=65536),
k=100
)
vecs_io.ivecs_write("/tmp/tt.ivecs", I)

View File

@@ -0,0 +1,502 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import os
import numpy as np
from collections import defaultdict
from matplotlib import pyplot
import re
from argparse import Namespace
from faiss.contrib.factory_tools import get_code_size as unitsize
def dbsize_from_name(dbname):
sufs = {
'1B': 10**9,
'100M': 10**8,
'10M': 10**7,
'1M': 10**6,
}
for s in sufs:
if dbname.endswith(s):
return sufs[s]
else:
assert False
def keep_latest_stdout(fnames):
fnames = [fname for fname in fnames if fname.endswith('.stdout')]
fnames.sort()
n = len(fnames)
fnames2 = []
for i, fname in enumerate(fnames):
if i + 1 < n and fnames[i + 1][:-8] == fname[:-8]:
continue
fnames2.append(fname)
return fnames2
def parse_result_file(fname):
# print fname
st = 0
res = []
keys = []
stats = {}
stats['run_version'] = fname[-8]
indexkey = None
for l in open(fname):
if l.startswith("srun:"):
# looks like a crash...
if indexkey is None:
raise RuntimeError("instant crash")
break
elif st == 0:
if l.startswith("dataset in dimension"):
fi = l.split()
stats["d"] = int(fi[3][:-1])
stats["nq"] = int(fi[9])
stats["nb"] = int(fi[11])
stats["nt"] = int(fi[13])
if l.startswith('index size on disk:'):
stats['index_size'] = int(l.split()[-1])
if l.startswith('current RSS:'):
stats['RSS'] = int(l.split()[-1])
if l.startswith('precomputed tables size:'):
stats['tables_size'] = int(l.split()[-1])
if l.startswith('Setting nb of threads to'):
stats['n_threads'] = int(l.split()[-1])
if l.startswith(' add in'):
stats['add_time'] = float(l.split()[-2])
if l.startswith("vector code_size"):
stats['code_size'] = float(l.split()[-1])
if l.startswith('args:'):
args = eval(l[l.find(' '):])
indexkey = args.indexkey
elif "time(ms/q)" in l:
# result header
if 'R@1 R@10 R@100' in l:
stats["measure"] = "recall"
stats["ranks"] = [1, 10, 100]
elif 'I@1 I@10 I@100' in l:
stats["measure"] = "inter"
stats["ranks"] = [1, 10, 100]
elif 'inter@' in l:
stats["measure"] = "inter"
fi = l.split()
if fi[1] == "inter@":
rank = int(fi[2])
else:
rank = int(fi[1][len("inter@"):])
stats["ranks"] = [rank]
else:
assert False
st = 1
elif 'index size on disk:' in l:
stats["index_size"] = int(l.split()[-1])
elif st == 1:
st = 2
elif st == 2:
fi = l.split()
if l[0] == " ":
# means there are 0 parameters
fi = [""] + fi
keys.append(fi[0])
res.append([float(x) for x in fi[1:]])
return indexkey, np.array(res), keys, stats
# the directory used in run_on_cluster.bash
basedir = "/checkpoint/matthijs/bench_all_ivf/"
logdir = basedir + 'logs/'
def collect_results_for(db='deep1M', prefix="autotune."):
# run parsing
allres = {}
allstats = {}
missing = []
fnames = keep_latest_stdout(os.listdir(logdir))
# print fnames
# filenames are in the form <key>.x.stdout
# where x is a version number (from a to z)
# keep only latest version of each name
for fname in fnames:
if not (
'db' + db in fname and
fname.startswith(prefix) and
fname.endswith('.stdout')
):
continue
print("parse", fname, end=" ", flush=True)
try:
indexkey, res, _, stats = parse_result_file(logdir + fname)
except RuntimeError as e:
print("FAIL %s" % e)
res = np.zeros((2, 0))
except Exception as e:
print("PARSE ERROR " + e)
res = np.zeros((2, 0))
else:
print(len(res), "results")
if res.size == 0:
missing.append(fname)
else:
if indexkey in allres:
if allstats[indexkey]['run_version'] > stats['run_version']:
# don't use this run
continue
allres[indexkey] = res
allstats[indexkey] = stats
return allres, allstats
def extract_pareto_optimal(allres, keys, recall_idx=0, times_idx=3):
bigtab = []
for i, k in enumerate(keys):
v = allres[k]
perf = v[:, recall_idx]
times = v[:, times_idx]
bigtab.append(
np.vstack((
np.ones(times.size) * i,
perf, times
))
)
if bigtab == []:
return [], np.zeros((3, 0))
bigtab = np.hstack(bigtab)
# sort by perf
perm = np.argsort(bigtab[1, :])
bigtab_sorted = bigtab[:, perm]
best_times = np.minimum.accumulate(bigtab_sorted[2, ::-1])[::-1]
selection, = np.where(bigtab_sorted[2, :] == best_times)
selected_keys = [
keys[i] for i in
np.unique(bigtab_sorted[0, selection].astype(int))
]
ops = bigtab_sorted[:, selection]
return selected_keys, ops
def plot_subset(
allres, allstats, selected_methods, recall_idx, times_idx=3,
report=["overhead", "build time"]):
# important methods
for k in selected_methods:
v = allres[k]
stats = allstats[k]
d = stats["d"]
dbsize = stats["nb"]
if "index_size" in stats and "tables_size" in stats:
tot_size = stats['index_size'] + stats['tables_size']
else:
tot_size = -1
id_size = 8 # 64 bit
addt = ''
if 'add_time' in stats:
add_time = stats['add_time']
if add_time > 7200:
add_min = add_time / 60
addt = ', %dh%02d' % (add_min / 60, add_min % 60)
else:
add_sec = int(add_time)
addt = ', %dm%02d' % (add_sec / 60, add_sec % 60)
code_size = unitsize(d, k)
label = k
if "code_size" in report:
label += " %d bytes" % code_size
tight_size = (code_size + id_size) * dbsize
if tot_size < 0 or "overhead" not in report:
pass # don't know what the index size is
elif tot_size > 10 * tight_size:
label += " overhead x%.1f" % (tot_size / tight_size)
else:
label += " overhead+%.1f%%" % (
tot_size / tight_size * 100 - 100)
if "build time" in report:
label += " " + addt
linestyle = (':' if 'Refine' in k or 'RFlat' in k else
'-.' if 'SQ' in k else
'-' if '4fs' in k else
'-')
print(k, linestyle)
pyplot.semilogy(v[:, recall_idx], 1000 / v[:, times_idx], label=label,
linestyle=linestyle,
marker='o' if '4fs' in k else '+')
recall_rank = stats["ranks"][recall_idx]
if stats["measure"] == "recall":
pyplot.xlabel('1-recall at %d' % recall_rank)
elif stats["measure"] == "inter":
pyplot.xlabel('inter @ %d' % recall_rank)
else:
assert False
pyplot.ylabel('QPS (%d threads)' % stats["n_threads"])
def plot_tradeoffs(db, allres, allstats, code_size, recall_rank):
stat0 = next(iter(allstats.values()))
d = stat0["d"]
n_threads = stat0["n_threads"]
recall_idx = stat0["ranks"].index(recall_rank)
# times come after the perf measure
times_idx = len(stat0["ranks"])
if type(code_size) == int:
if code_size == 0:
code_size = [0, 1e50]
code_size_name = "any code size"
else:
code_size_name = "code_size=%d" % code_size
code_size = [code_size, code_size]
elif type(code_size) == tuple:
code_size_name = "code_size in [%d, %d]" % code_size
else:
assert False
names_maxperf = []
for k in sorted(allres):
v = allres[k]
if v.ndim != 2: continue
us = unitsize(d, k)
if not code_size[0] <= us <= code_size[1]: continue
names_maxperf.append((v[-1, recall_idx], k))
# sort from lowest to highest topline accuracy
names_maxperf.sort()
names = [name for mp, name in names_maxperf]
selected_methods, optimal_points = \
extract_pareto_optimal(allres, names, recall_idx, times_idx)
not_selected = list(set(names) - set(selected_methods))
print("methods without an optimal OP: ", not_selected)
pyplot.title('database ' + db + ' ' + code_size_name)
# grayed out lines
for k in not_selected:
v = allres[k]
if v.ndim != 2: continue
us = unitsize(d, k)
if not code_size[0] <= us <= code_size[1]: continue
linestyle = (':' if 'PQ' in k else
'-.' if 'SQ4' in k else
'--' if 'SQ8' in k else '-')
pyplot.semilogy(v[:, recall_idx], 1000 / v[:, times_idx], label=None,
linestyle=linestyle,
marker='o' if 'HNSW' in k else '+',
color='#cccccc', linewidth=0.2)
plot_subset(allres, allstats, selected_methods, recall_idx, times_idx)
if len(not_selected) == 0:
om = ''
else:
om = '\nomitted:'
nc = len(om)
for m in not_selected:
if nc > 80:
om += '\n'
nc = 0
om += ' ' + m
nc += len(m) + 1
# pyplot.semilogy(optimal_points[1, :], optimal_points[2, :], marker="s")
# print(optimal_points[0, :])
pyplot.xlabel('1-recall at %d %s' % (recall_rank, om) )
pyplot.ylabel('QPS (%d threads)' % n_threads)
pyplot.legend()
pyplot.grid()
return selected_methods, not_selected
if __name__ == "__main__xx":
# tests on centroids indexing (v1)
for k in 1, 32, 128:
pyplot.gcf().set_size_inches(15, 10)
i = 1
for ncent in 65536, 262144, 1048576, 4194304:
db = f'deep_centroids_{ncent}.k{k}.'
allres, allstats = collect_results_for(
db=db, prefix="cent_index.")
pyplot.subplot(2, 2, i)
plot_subset(
allres, allstats, list(allres.keys()),
recall_idx=0,
times_idx=1,
report=["code_size"]
)
i += 1
pyplot.title(f"{ncent} centroids")
pyplot.legend()
pyplot.xlim([0.95, 1])
pyplot.grid()
pyplot.savefig('figs/deep1B_centroids_k%d.png' % k)
if __name__ == "__main__xx":
# centroids plot per k
pyplot.gcf().set_size_inches(15, 10)
i=1
for ncent in 65536, 262144, 1048576, 4194304:
xyd = defaultdict(list)
for k in 1, 4, 8, 16, 32, 64, 128, 256:
db = f'deep_centroids_{ncent}.k{k}.'
allres, allstats = collect_results_for(db=db, prefix="cent_index.")
for indexkey, res in allres.items():
idx, = np.where(res[:, 0] >= 0.99)
if idx.size > 0:
xyd[indexkey].append((k, 1000 / res[idx[0], 1]))
pyplot.subplot(2, 2, i)
i += 1
for indexkey, xy in xyd.items():
xy = np.array(xy)
pyplot.loglog(xy[:, 0], xy[:, 1], 'o-', label=indexkey)
pyplot.title(f"{ncent} centroids")
pyplot.xlabel("k")
xt = 2**np.arange(9)
pyplot.xticks(xt, ["%d" % x for x in xt])
pyplot.ylabel("QPS (32 threads)")
pyplot.legend()
pyplot.grid()
pyplot.savefig('../plots/deep1B_centroids_min99.png')
if __name__ == "__main__xx":
# main indexing plots
i = 0
for db in 'bigann10M', 'deep10M', 'bigann100M', 'deep100M', 'deep1B', 'bigann1B':
allres, allstats = collect_results_for(
db=db, prefix="autotune.")
for cs in 8, 16, 32, 64:
pyplot.figure(i)
i += 1
pyplot.gcf().set_size_inches(15, 10)
cs_range = (
(0, 8) if cs == 8 else (cs // 2 + 1, cs)
)
plot_tradeoffs(
db, allres, allstats, code_size=cs_range, recall_rank=1)
pyplot.savefig('../plots/tradeoffs_%s_cs%d_r1.png' % (
db, cs))
if __name__ == "__main__":
# 1M indexes
i = 0
for db in "glove", "music-100":
pyplot.figure(i)
pyplot.gcf().set_size_inches(15, 10)
i += 1
allres, allstats = collect_results_for(db=db, prefix="autotune.")
plot_tradeoffs(db, allres, allstats, code_size=0, recall_rank=1)
pyplot.savefig('../plots/1M_tradeoffs_' + db + ".png")
for db in "sift1M", "deep1M":
allres, allstats = collect_results_for(db=db, prefix="autotune.")
pyplot.figure(i)
pyplot.gcf().set_size_inches(15, 10)
i += 1
plot_tradeoffs(db, allres, allstats, code_size=(0, 64), recall_rank=1)
pyplot.savefig('../plots/1M_tradeoffs_' + db + "_small.png")
pyplot.figure(i)
pyplot.gcf().set_size_inches(15, 10)
i += 1
plot_tradeoffs(db, allres, allstats, code_size=(65, 10000), recall_rank=1)
pyplot.savefig('../plots/1M_tradeoffs_' + db + "_large.png")
if __name__ == "__main__xx":
db = 'sift1M'
allres, allstats = collect_results_for(db=db, prefix="autotune.")
pyplot.gcf().set_size_inches(15, 10)
keys = [
"IVF1024,PQ32x8",
"IVF1024,PQ64x4",
"IVF1024,PQ64x4fs",
"IVF1024,PQ64x4fsr",
"IVF1024,SQ4",
"IVF1024,SQ8"
]
plot_subset(allres, allstats, keys, recall_idx=0, report=["code_size"])
pyplot.legend()
pyplot.title(db)
pyplot.xlabel("1-recall@1")
pyplot.ylabel("QPS (32 threads)")
pyplot.grid()
pyplot.savefig('../plots/ivf1024_variants.png')
pyplot.figure(2)
pyplot.gcf().set_size_inches(15, 10)
keys = [
"HNSW32",
"IVF1024,PQ64x4fs",
"IVF1024,PQ64x4fsr",
"IVF1024,PQ64x4fs,RFlat",
"IVF1024,PQ64x4fs,Refine(SQfp16)",
"IVF1024,PQ64x4fs,Refine(SQ8)",
]
plot_subset(allres, allstats, keys, recall_idx=0, report=["code_size"])
pyplot.legend()
pyplot.title(db)
pyplot.xlabel("1-recall@1")
pyplot.ylabel("QPS (32 threads)")
pyplot.grid()
pyplot.savefig('../plots/ivf1024_rerank.png')

View File

@@ -0,0 +1,603 @@
set -e
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# @nolint
# This script launches the experiments on a cluster
# It assumes two shell functions are defined:
#
# run_on_1machine: runs a command on one (full) machine on a cluster
#
# run_on_8gpu: runs a command on one machine with 8 GPUs
#
# the two functions are called as:
#
# run_on_1machine <name> <command>
#
# the stdout of the command should be stored in $logdir/<name>.stdout
function run_on ()
{
sys="$1"
shift
name="$1"
shift
script="$logdir/$name.sh"
if [ -e "$script" ]; then
echo script "$script" exists
return
fi
# srun handles special characters fine, but the shell interpreter
# does not
escaped_cmd=$( printf "%q " "$@" )
cat > $script <<EOF
#! /bin/bash
srun $escaped_cmd
EOF
echo -n "$logdir/$name.stdout "
sbatch -n1 -J "$name" \
$sys \
--comment='priority is the only one that works' \
--output="$logdir/$name.stdout" \
"$script"
}
function run_on_1machine {
run_on "--cpus-per-task=80 --gres=gpu:0 --mem=500G --time=70:00:00 --partition=priority" "$@"
}
function run_on_1machine_1h {
run_on "--cpus-per-task=80 --gres=gpu:2 --mem=100G --time=1:00:00 --partition=priority" "$@"
}
function run_on_1machine_3h {
run_on "--cpus-per-task=80 --gres=gpu:2 --mem=100G --time=3:00:00 --partition=priority" "$@"
}
function run_on_4gpu_3h {
run_on "--cpus-per-task=40 --gres=gpu:4 --mem=100G --time=3:00:00 --partition=priority" "$@"
}
function run_on_8gpu () {
run_on "--cpus-per-task=80 --gres=gpu:8 --mem=100G --time=70:00:00 --partition=priority" "$@"
}
# prepare output directories
# set to some directory where all indexes, can be written.
basedir=/checkpoint/matthijs/bench_all_ivf
logdir=$basedir/logs
indexdir=$basedir/indexes
centdir=$basedir/precomputed_clusters
mkdir -p $logdir $indexdir
# adds an option to use a pretrained quantizer
function add_precomputed_quantizer () {
local db="$1"
local coarse="$2"
case $db in
bigann*) rname=bigann ;;
deep*) rname=deep ;;
sift1M) return;;
music-100) return ;;
glove) return ;;
*) echo "bad db"; exit 1;;
esac
case $coarse in
IVF65536*)
cname=clustering.db${rname}1M.IVF65536.faissindex
copt="--get_centroids_from $centdir/$cname"
;;
IVF262144*)
cname=clustering.db${rname}1M.IVF262144.faissindex
copt="--get_centroids_from $centdir/$cname"
;;
IVF1048576*)
cname=clustering.db${rname}1M.IVF1048576.faissindex
copt="--get_centroids_from $centdir/$cname"
;;
IVF4194304*)
cname=clustering.db${rname}1M.IVF4194304.faissindex
copt="--get_centroids_from $centdir/$cname"
;;
*)
copt="" ;;
esac
echo $copt
}
function get_db_dim () {
local db="$1"
case $db in
sift1M) dim=128;;
bigann*) dim=128;;
deep*) dim=96;;
music-100) dim=100;;
glove) dim=100;;
*) echo "bad db"; exit 1;;
esac
echo $dim
}
# replace HD = half dim with the half of the dimension we need to handle
# relying that variables are global by default...
function replace_coarse_PQHD () {
local coarse="$1"
local dim=$2
coarseD=${coarse//PQHD/PQ$((dim/2))}
coarse16=${coarse//PQHD/PQ8}
coarse32=${coarse//PQHD/PQ16}
coarse64=${coarse//PQHD/PQ32}
coarse128=${coarse//PQHD/PQ64}
coarse256=${coarse//PQHD/PQ128}
coarse112=${coarse//PQHD/PQ56}
}
if false; then
###############################################
# comparison with SCANN
for db in sift1M deep1M glove music-100
do
opt=""
if [ $db == glove ]; then
opt="--measure inter"
fi
run_on_1machine_1h cmp_with_scann.$db.c \
python -u cmp_with_scann.py --db $db \
--lib faiss $opt --thenscann
done
############################### Preliminary SIFT1M experiment
for db in sift1M ; do
for coarse in IVF1024
do
indexkeys="
HNSW32
$coarse,SQfp16
$coarse,SQ4
$coarse,SQ8
$coarse,PQ32x8
$coarse,PQ64x4
$coarse,PQ64x4fs
$coarse,PQ64x4fs,RFlat
$coarse,PQ64x4fs,Refine(SQfp16)
$coarse,PQ64x4fs,Refine(SQ8)
OPQ64,$coarse,PQ64x4fs
OPQ64,$coarse,PQ64x4fs,RFlat
"
indexkeys="
$coarse,PQ64x4fsr
$coarse,PQ64x4fsr,RFlat
"
# OPQ actually degrades the results on SIFT1M, so let's ignore
for indexkey in $indexkeys
do
# escape nasty characters
key="autotune.db$db.${indexkey//,/_}"
key="${key//(/_}"
key="${key//)/_}"
run_on_1machine_1h $key.a \
python -u bench_all_ivf.py \
--db $db \
--indexkey "$indexkey" \
--maxtrain 0 \
--indexfile $indexdir/$key.faissindex \
--searchthreads 32
done
done
done
############################### 1M experiments
fi
# for db in sift1M deep1M music-100 glove; do
for db in glove music-100; do
dim=$( get_db_dim $db )
for coarse in IVF1024 IVF4096_HNSW32
do
replace_coarse_PQHD "$coarse" $dim
indexkeys="
$coarseD,PQ$((dim/2))x4fs
$coarseD,PQ$((dim/2))x4fsr
OPQ8_64,$coarse64,PQ8
PCAR16,$coarse16,SQ4
OPQ16_64,$coarse64,PQ16x4fs
OPQ16_64,$coarse64,PQ16x4fsr
OPQ16_64,$coarse64,PQ16
PCAR16,$coarse16,SQ8
PCAR32,$coarse32,SQ4
OPQ32_64,$coarse64,PQ32x4fs
OPQ32_64,$coarse64,PQ32x4fsr
OPQ32_128,$coarse128,PQ32
PCAR32,$coarse32,SQ8
PCAR64,$coarse64,SQ4
PCAR16,$coarse16,SQfp16
OPQ64_128,$coarse128,PQ64x4fs
OPQ64_128,$coarse128,PQ64x4fsr
OPQ64_128,$coarse128,PQ64
PCAR64,$coarse64,SQ8
PCAR32,$coarse32,SQfp16
PCAR128,$coarse128,SQ4
OPQ128_256,$coarse256,PQ128x4fs
OPQ128_256,$coarse256,PQ128x4fsr
OPQ16_64,$coarse64,PQ16x4fs,Refine(OPQ56_112,PQ56)
OPQ16_64,$coarse64,PQ16x4fs,Refine(PCAR72,SQ6)
OPQ32_64,$coarse64,PQ16x4fs,Refine(PCAR64,SQ6)
OPQ32_64,$coarse64,PQ32x4fs,Refine(OPQ48_96,PQ48)
OPQ64_128,$coarse,PQ64x12
OPQ64_128,$coarse,PQ64x4fs,RFlat
OPQ64_128,$coarse,PQ64x4fs,Refine(SQfp16)
OPQ64_128,$coarse,PQ64x4fs,Refine(SQ8)
OPQ64_128,$coarse,PQ64x4fs,Refine(SQ6)
OPQ64_128,$coarse,PQ64x4fs,Refine(SQ4)
OPQ32_64,$coarse,PQ32x4fs,Refine(SQfp16)
OPQ32_64,$coarse,PQ32x4fs,Refine(SQ8)
OPQ32_64,$coarse,PQ32x4fs,Refine(SQ6)
OPQ32_64,$coarse,PQ32x4fs,Refine(SQ4)
"
indexkeys="
$coarseD,PQ$((dim/2))x4fs
$coarseD,PQ$((dim/2))x4fsr
$coarseD,PQ$((dim/2))x4fsr,RFlat
$coarseD,PQ$((dim/2))x4fsr,Refine(SQfp16)
$coarseD,PQ$((dim/2))x4fsr,Refine(SQ8)
$coarseD,PQ$((dim/4))x4fs
$coarseD,PQ$((dim/4))x4fsr
$coarseD,PQ$((dim/4))x4fsr,RFlat
$coarseD,PQ$((dim/4))x4fsr,Refine(SQfp16)
$coarseD,PQ$((dim/4))x4fsr,Refine(SQ8)
$coarseD,PQ$((dim/2))
$coarseD,PQ$((dim/4))
HNSW32,Flat
"
indexkeys="HNSW32,Flat"
for indexkey in $indexkeys
do
key=autotune.db$db.${indexkey//,/_}
key="${key//(/_}"
key="${key//)/_}"
run_on_1machine_3h $key.q \
python -u bench_all_ivf.py \
--db $db \
--indexkey "$indexkey" \
--maxtrain 0 \
--indexfile "$indexdir/$key.faissindex" \
$( add_precomputed_quantizer $db $coarse ) \
--searchthreads 32 \
--min_test_duration 3
done
done
done
if false; then
############################################
# precompute centroids on GPU for large vocabularies
for db in deep1M bigann1M; do
for ncent in 262144 65536 1048576 4194304; do
key=clustering.db$db.IVF$ncent
run_on_4gpu_3h $key.e \
python -u bench_all_ivf.py \
--db $db \
--indexkey IVF$ncent,SQ8 \
--maxtrain 100000000 \
--indexfile $centdir/$key.faissindex \
--searchthreads 32 \
--min_test_duration 3 \
--add_bs 1000000 \
--train_on_gpu
done
done
###############################
## coarse quantizer experiments on the centroids of deep1B
for k in 4 8 16 64 256; do
for ncent in 65536 262144 1048576 4194304; do
db=deep_centroids_$ncent
# compute square root of ncent...
for(( ls=0; ncent > (1 << (2 * ls)); ls++)); do
echo -n
done
sncent=$(( 1 << ls ))
indexkeys="
IVF$((sncent/2)),PQ48x4fs,RFlat
IVF$((sncent*2)),PQ48x4fs,RFlat
HNSW32
PQ48x4fs
PQ48x4fs,RFlat
IVF$sncent,PQ48x4fs,RFlat
"
for indexkey in $indexkeys; do
key="cent_index.db$db.k$k.$indexkey"
run_on_1machine_1h "$key.b" \
python -u bench_all_ivf.py \
--db $db \
--indexkey "$indexkey" \
--maxtrain 0 \
--inter \
--searchthreads 32 \
--k $k
done
done
done
############################### 10M experiments
for db in deep10M bigann10M; do
coarses="
IVF65536(IVF256,PQHDx4fs,RFlat)
IVF16384_HNSW32
IVF65536_HNSW32
IVF262144_HNSW32
IVF262144(IVF512,PQHDx4fs,RFlat)
"
dim=$( get_db_dim $db )
for coarse in $coarses
do
replace_coarse_PQHD "$coarse" $dim
indexkeys="
$coarseD,PQ$((dim/2))x4fs
OPQ8_64,$coarse64,PQ8
PCAR16,$coarse16,SQ4
OPQ16_64,$coarse64,PQ16x4fs
OPQ16_64,$coarse64,PQ16x4fsr
OPQ16_64,$coarse64,PQ16
PCAR16,$coarse16,SQ8
PCAR32,$coarse32,SQ4
OPQ32_64,$coarse64,PQ32x4fs
OPQ32_64,$coarse64,PQ32x4fsr
OPQ32_128,$coarse128,PQ32
PCAR32,$coarse32,SQ8
PCAR64,$coarse64,SQ4
PCAR16,$coarse16,SQfp16
OPQ64_128,$coarse128,PQ64x4fs
OPQ64_128,$coarse128,PQ64x4fsr
OPQ64_128,$coarse128,PQ64
PCAR64,$coarse64,SQ8
PCAR32,$coarse32,SQfp16
PCAR128,$coarse128,SQ4
OPQ128_256,$coarse256,PQ128x4fs
OPQ128_256,$coarse256,PQ128x4fsr
OPQ56_112,$coarse112,PQ7+56
OPQ16_64,$coarse64,PQ16x4fs,Refine(OPQ56_112,PQ56)
OPQ16_64,$coarse64,PQ16x4fs,Refine(PCAR72,SQ6)
OPQ32_64,$coarse64,PQ16x4fs,Refine(PCAR64,SQ6)
OPQ32_64,$coarse64,PQ32x4fs,Refine(OPQ48_96,PQ48)
"
indexkeys="
OPQ16_64,$coarse64,PQ16x4fsr
OPQ32_64,$coarse64,PQ32x4fsr
OPQ64_128,$coarse128,PQ64x4fsr
OPQ128_256,$coarse256,PQ128x4fsr
"
for indexkey in $indexkeys
do
key=autotune.db$db.${indexkey//,/_}
key="${key//(/_}"
key="${key//)/_}"
run_on_1machine_3h $key.l \
python -u bench_all_ivf.py \
--db $db \
--indexkey "$indexkey" \
--maxtrain 0 \
--indexfile "$indexdir/$key.faissindex" \
$( add_precomputed_quantizer $db $coarse ) \
--searchthreads 32 \
--min_test_duration 3 \
--autotune_max nprobe:2000
done
done
done
############################### 100M experiments
for db in deep100M bigann100M; do
coarses="
IVF65536_HNSW32
IVF262144_HNSW32
IVF262144(IVF512,PQHDx4fs,RFlat)
IVF1048576_HNSW32
IVF1048576(IVF1024,PQHDx4fs,RFlat)
"
dim=$( get_db_dim $db )
for coarse in $coarses
do
replace_coarse_PQHD "$coarse" $dim
indexkeys="
OPQ8_64,$coarse64,PQ8
OPQ16_64,$coarse64,PQ16x4fs
PCAR32,$coarse32,SQ4
OPQ16_64,$coarse64,PQ16
OPQ32_64,$coarse64,PQ32x4fs
OPQ32_128,$coarse128,PQ32
PCAR64,$coarse64,SQ4
PCAR32,$coarse32,SQ8
OPQ64_128,$coarse128,PQ64x4fs
PCAR128,$coarse128,SQ4
OPQ64_128,$coarse128,PQ64
PCAR32,$coarse32,SQfp16
PCAR64,$coarse64,SQ8
OPQ128_256,$coarse256,PQ128x4fs
OPQ56_112,$coarse112,PQ7+56
OPQ16_64,$coarse64,PQ16x4fs,Refine(OPQ56_112,PQ56)
$coarseD,PQ$((dim/2))x4fs
"
indexkeys="
OPQ128_256,$coarse256,PQ128x4fsr
OPQ64_128,$coarse128,PQ64x4fsr
OPQ32_64,$coarse64,PQ32x4fsr
OPQ16_64,$coarse64,PQ16x4fsr
OPQ16_64,$coarse64,PQ16x4fsr,Refine(OPQ56_112,PQ56)
"
for indexkey in $indexkeys
do
key=autotune.db$db.${indexkey//,/_}
key="${key//(/_}"
key="${key//)/_}"
run_on_1machine $key.e \
python -u bench_all_ivf.py \
--db $db \
--indexkey "$indexkey" \
--maxtrain 0 \
--indexfile $indexdir/$key.faissindex \
--searchthreads 32 \
--min_test_duration 3 \
$( add_precomputed_quantizer $db $coarse ) \
--add_bs 1000000 \
--autotune_max nprobe:2000
done
done
done
#################################
# 1B-scale experiment
for db in deep1B bigann1B; do
coarses="
IVF1048576_HNSW32
IVF4194304_HNSW32
IVF4194304(IVF1024,PQHDx4fs,RFlat)
"
dim=$( get_db_dim $db )
for coarse in $coarses; do
replace_coarse_PQHD "$coarse" $dim
indexkeys="
OPQ8_64,$coarse64,PQ8
OPQ16_64,$coarse64,PQ16x4fsr
OPQ16_64,$coarse64,PQ16
OPQ32_64,$coarse64,PQ32x4fsr
OPQ32_128,$coarse128,PQ32
OPQ64_128,$coarse128,PQ64x4fsr
OPQ64_128,$coarse128,PQ64
OPQ128_256,$coarse256,PQ128x4fsr
OPQ56_112,$coarse112,PQ7+56
OPQ16_64,$coarse64,PQ16x4fs,Refine(OPQ56_112,PQ56)
$coarseD,PQ$((dim/2))x4fs
"
for indexkey in $indexkeys
do
key=autotune.db$db.${indexkey//,/_}
key="${key//(/_}"
key="${key//)/_}"
run_on_1machine $key.d \
python -u bench_all_ivf.py \
--db $db \
--indexkey "$indexkey" \
--maxtrain 0 \
--indexfile $indexdir/$key.faissindex \
--searchthreads 32 \
--min_test_duration 3 \
$( add_precomputed_quantizer $db $coarse ) \
--add_bs 1000000 \
--autotune_max nprobe:3000
done
done
done
fi