Initial commit
This commit is contained in:
492
packages/leann-backend-hnsw/third_party/faiss/contrib/evaluation.py
vendored
Normal file
492
packages/leann-backend-hnsw/third_party/faiss/contrib/evaluation.py
vendored
Normal file
@@ -0,0 +1,492 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import numpy as np
|
||||
import unittest
|
||||
import time
|
||||
import faiss
|
||||
|
||||
from multiprocessing.pool import ThreadPool
|
||||
|
||||
###############################################################
|
||||
# Simple functions to evaluate knn results
|
||||
|
||||
def knn_intersection_measure(I1, I2):
|
||||
""" computes the intersection measure of two result tables
|
||||
"""
|
||||
nq, rank = I1.shape
|
||||
assert I2.shape == (nq, rank)
|
||||
ninter = sum(
|
||||
np.intersect1d(I1[i], I2[i]).size
|
||||
for i in range(nq)
|
||||
)
|
||||
return ninter / I1.size
|
||||
|
||||
###############################################################
|
||||
# Range search results can be compared with Precision-Recall
|
||||
|
||||
def filter_range_results(lims, D, I, thresh):
|
||||
""" select a set of results """
|
||||
nq = lims.size - 1
|
||||
mask = D < thresh
|
||||
new_lims = np.zeros_like(lims)
|
||||
for i in range(nq):
|
||||
new_lims[i + 1] = new_lims[i] + mask[lims[i] : lims[i + 1]].sum()
|
||||
return new_lims, D[mask], I[mask]
|
||||
|
||||
|
||||
def range_PR(lims_ref, Iref, lims_new, Inew, mode="overall"):
|
||||
"""compute the precision and recall of range search results. The
|
||||
function does not take the distances into account. """
|
||||
|
||||
def ref_result_for(i):
|
||||
return Iref[lims_ref[i]:lims_ref[i + 1]]
|
||||
|
||||
def new_result_for(i):
|
||||
return Inew[lims_new[i]:lims_new[i + 1]]
|
||||
|
||||
nq = lims_ref.size - 1
|
||||
assert lims_new.size - 1 == nq
|
||||
|
||||
ninter = np.zeros(nq, dtype="int64")
|
||||
|
||||
def compute_PR_for(q):
|
||||
|
||||
# ground truth results for this query
|
||||
gt_ids = ref_result_for(q)
|
||||
|
||||
# results for this query
|
||||
new_ids = new_result_for(q)
|
||||
|
||||
# there are no set functions in numpy so let's do this
|
||||
inter = np.intersect1d(gt_ids, new_ids)
|
||||
|
||||
ninter[q] = len(inter)
|
||||
|
||||
# run in a thread pool, which helps in spite of the GIL
|
||||
pool = ThreadPool(20)
|
||||
pool.map(compute_PR_for, range(nq))
|
||||
|
||||
return counts_to_PR(
|
||||
lims_ref[1:] - lims_ref[:-1],
|
||||
lims_new[1:] - lims_new[:-1],
|
||||
ninter,
|
||||
mode=mode
|
||||
)
|
||||
|
||||
|
||||
def counts_to_PR(ngt, nres, ninter, mode="overall"):
|
||||
""" computes a precision-recall for a ser of queries.
|
||||
ngt = nb of GT results per query
|
||||
nres = nb of found results per query
|
||||
ninter = nb of correct results per query (smaller than nres of course)
|
||||
"""
|
||||
|
||||
if mode == "overall":
|
||||
ngt, nres, ninter = ngt.sum(), nres.sum(), ninter.sum()
|
||||
|
||||
if nres > 0:
|
||||
precision = ninter / nres
|
||||
else:
|
||||
precision = 1.0
|
||||
|
||||
if ngt > 0:
|
||||
recall = ninter / ngt
|
||||
elif nres == 0:
|
||||
recall = 1.0
|
||||
else:
|
||||
recall = 0.0
|
||||
|
||||
return precision, recall
|
||||
|
||||
elif mode == "average":
|
||||
# average precision and recall over queries
|
||||
|
||||
mask = ngt == 0
|
||||
ngt[mask] = 1
|
||||
|
||||
recalls = ninter / ngt
|
||||
recalls[mask] = (nres[mask] == 0).astype(float)
|
||||
|
||||
# avoid division by 0
|
||||
mask = nres == 0
|
||||
assert np.all(ninter[mask] == 0)
|
||||
ninter[mask] = 1
|
||||
nres[mask] = 1
|
||||
|
||||
precisions = ninter / nres
|
||||
|
||||
return precisions.mean(), recalls.mean()
|
||||
|
||||
else:
|
||||
raise AssertionError()
|
||||
|
||||
def sort_range_res_2(lims, D, I):
|
||||
""" sort 2 arrays using the first as key """
|
||||
I2 = np.empty_like(I)
|
||||
D2 = np.empty_like(D)
|
||||
nq = len(lims) - 1
|
||||
for i in range(nq):
|
||||
l0, l1 = lims[i], lims[i + 1]
|
||||
ii = I[l0:l1]
|
||||
di = D[l0:l1]
|
||||
o = di.argsort()
|
||||
I2[l0:l1] = ii[o]
|
||||
D2[l0:l1] = di[o]
|
||||
return I2, D2
|
||||
|
||||
|
||||
def sort_range_res_1(lims, I):
|
||||
I2 = np.empty_like(I)
|
||||
nq = len(lims) - 1
|
||||
for i in range(nq):
|
||||
l0, l1 = lims[i], lims[i + 1]
|
||||
I2[l0:l1] = I[l0:l1]
|
||||
I2[l0:l1].sort()
|
||||
return I2
|
||||
|
||||
|
||||
def range_PR_multiple_thresholds(
|
||||
lims_ref, Iref,
|
||||
lims_new, Dnew, Inew,
|
||||
thresholds,
|
||||
mode="overall", do_sort="ref,new"
|
||||
):
|
||||
""" compute precision-recall values for range search results
|
||||
for several thresholds on the "new" results.
|
||||
This is to plot PR curves
|
||||
"""
|
||||
# ref should be sorted by ids
|
||||
if "ref" in do_sort:
|
||||
Iref = sort_range_res_1(lims_ref, Iref)
|
||||
|
||||
# new should be sorted by distances
|
||||
if "new" in do_sort:
|
||||
Inew, Dnew = sort_range_res_2(lims_new, Dnew, Inew)
|
||||
|
||||
def ref_result_for(i):
|
||||
return Iref[lims_ref[i]:lims_ref[i + 1]]
|
||||
|
||||
def new_result_for(i):
|
||||
l0, l1 = lims_new[i], lims_new[i + 1]
|
||||
return Inew[l0:l1], Dnew[l0:l1]
|
||||
|
||||
nq = lims_ref.size - 1
|
||||
assert lims_new.size - 1 == nq
|
||||
|
||||
nt = len(thresholds)
|
||||
counts = np.zeros((nq, nt, 3), dtype="int64")
|
||||
|
||||
def compute_PR_for(q):
|
||||
gt_ids = ref_result_for(q)
|
||||
res_ids, res_dis = new_result_for(q)
|
||||
|
||||
counts[q, :, 0] = len(gt_ids)
|
||||
|
||||
if res_dis.size == 0:
|
||||
# the rest remains at 0
|
||||
return
|
||||
|
||||
# which offsets we are interested in
|
||||
nres= np.searchsorted(res_dis, thresholds)
|
||||
counts[q, :, 1] = nres
|
||||
|
||||
if gt_ids.size == 0:
|
||||
return
|
||||
|
||||
# find number of TPs at each stage in the result list
|
||||
ii = np.searchsorted(gt_ids, res_ids)
|
||||
ii[ii == len(gt_ids)] = -1
|
||||
n_ok = np.cumsum(gt_ids[ii] == res_ids)
|
||||
|
||||
# focus on threshold points
|
||||
n_ok = np.hstack(([0], n_ok))
|
||||
counts[q, :, 2] = n_ok[nres]
|
||||
|
||||
pool = ThreadPool(20)
|
||||
pool.map(compute_PR_for, range(nq))
|
||||
# print(counts.transpose(2, 1, 0))
|
||||
|
||||
precisions = np.zeros(nt)
|
||||
recalls = np.zeros(nt)
|
||||
for t in range(nt):
|
||||
p, r = counts_to_PR(
|
||||
counts[:, t, 0], counts[:, t, 1], counts[:, t, 2],
|
||||
mode=mode
|
||||
)
|
||||
precisions[t] = p
|
||||
recalls[t] = r
|
||||
|
||||
return precisions, recalls
|
||||
|
||||
|
||||
###############################################################
|
||||
# Functions that compare search results with a reference result.
|
||||
# They are intended for use in tests
|
||||
|
||||
def _cluster_tables_with_tolerance(tab1, tab2, thr):
|
||||
""" for two tables, cluster them by merging values closer than thr.
|
||||
Returns the cluster ids for each table element """
|
||||
tab = np.hstack([tab1, tab2])
|
||||
tab.sort()
|
||||
n = len(tab)
|
||||
diffs = np.ones(n)
|
||||
diffs[1:] = tab[1:] - tab[:-1]
|
||||
unique_vals = tab[diffs > thr]
|
||||
idx1 = np.searchsorted(unique_vals, tab1, side='right') - 1
|
||||
idx2 = np.searchsorted(unique_vals, tab2, side='right') - 1
|
||||
return idx1, idx2
|
||||
|
||||
|
||||
def check_ref_knn_with_draws(Dref, Iref, Dnew, Inew, rtol=1e-5):
|
||||
""" test that knn search results are identical, with possible ties.
|
||||
Raise if not. """
|
||||
np.testing.assert_allclose(Dref, Dnew, rtol=rtol)
|
||||
# here we have to be careful because of draws
|
||||
testcase = unittest.TestCase() # because it makes nice error messages
|
||||
for i in range(len(Iref)):
|
||||
if np.all(Iref[i] == Inew[i]): # easy case
|
||||
continue
|
||||
|
||||
# otherwise collect elements per distance
|
||||
r = rtol * Dref[i].max()
|
||||
|
||||
DrefC, DnewC = _cluster_tables_with_tolerance(Dref[i], Dnew[i], r)
|
||||
|
||||
for dis in np.unique(DrefC):
|
||||
if dis == DrefC[-1]:
|
||||
continue
|
||||
mask = DrefC == dis
|
||||
testcase.assertEqual(set(Iref[i, mask]), set(Inew[i, mask]))
|
||||
|
||||
|
||||
def check_ref_range_results(Lref, Dref, Iref,
|
||||
Lnew, Dnew, Inew):
|
||||
""" compare range search results wrt. a reference result,
|
||||
throw if it fails """
|
||||
np.testing.assert_array_equal(Lref, Lnew)
|
||||
nq = len(Lref) - 1
|
||||
for i in range(nq):
|
||||
l0, l1 = Lref[i], Lref[i + 1]
|
||||
Ii_ref = Iref[l0:l1]
|
||||
Ii_new = Inew[l0:l1]
|
||||
Di_ref = Dref[l0:l1]
|
||||
Di_new = Dnew[l0:l1]
|
||||
if np.all(Ii_ref == Ii_new): # easy
|
||||
pass
|
||||
else:
|
||||
def sort_by_ids(I, D):
|
||||
o = I.argsort()
|
||||
return I[o], D[o]
|
||||
# sort both
|
||||
(Ii_ref, Di_ref) = sort_by_ids(Ii_ref, Di_ref)
|
||||
(Ii_new, Di_new) = sort_by_ids(Ii_new, Di_new)
|
||||
np.testing.assert_array_equal(Ii_ref, Ii_new)
|
||||
np.testing.assert_array_almost_equal(Di_ref, Di_new, decimal=5)
|
||||
|
||||
|
||||
###############################################################
|
||||
# OperatingPoints functions
|
||||
# this is the Python version of the AutoTune object in C++
|
||||
|
||||
class OperatingPoints:
|
||||
"""
|
||||
Manages a set of search parameters with associated performance and time.
|
||||
Keeps the Pareto optimal points.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# list of (key, perf, t)
|
||||
self.operating_points = [
|
||||
# (self.do_nothing_key(), 0.0, 0.0)
|
||||
]
|
||||
self.suboptimal_points = []
|
||||
|
||||
def compare_keys(self, k1, k2):
|
||||
""" return -1 if k1 > k2, 1 if k2 > k1, 0 otherwise """
|
||||
raise NotImplemented
|
||||
|
||||
def do_nothing_key(self):
|
||||
""" parameters to say we do noting, takes 0 time and has 0 performance"""
|
||||
raise NotImplemented
|
||||
|
||||
def is_pareto_optimal(self, perf_new, t_new):
|
||||
for _, perf, t in self.operating_points:
|
||||
if perf >= perf_new and t <= t_new:
|
||||
return False
|
||||
return True
|
||||
|
||||
def predict_bounds(self, key):
|
||||
""" predicts the bound on time and performance """
|
||||
min_time = 0.0
|
||||
max_perf = 1.0
|
||||
for key2, perf, t in self.operating_points + self.suboptimal_points:
|
||||
cmp = self.compare_keys(key, key2)
|
||||
if cmp > 0: # key2 > key
|
||||
if t > min_time:
|
||||
min_time = t
|
||||
if cmp < 0: # key2 < key
|
||||
if perf < max_perf:
|
||||
max_perf = perf
|
||||
return max_perf, min_time
|
||||
|
||||
def should_run_experiment(self, key):
|
||||
(max_perf, min_time) = self.predict_bounds(key)
|
||||
return self.is_pareto_optimal(max_perf, min_time)
|
||||
|
||||
def add_operating_point(self, key, perf, t):
|
||||
if self.is_pareto_optimal(perf, t):
|
||||
i = 0
|
||||
# maybe it shadows some other operating point completely?
|
||||
while i < len(self.operating_points):
|
||||
op_Ls, perf2, t2 = self.operating_points[i]
|
||||
if perf >= perf2 and t < t2:
|
||||
self.suboptimal_points.append(
|
||||
self.operating_points.pop(i))
|
||||
else:
|
||||
i += 1
|
||||
self.operating_points.append((key, perf, t))
|
||||
return True
|
||||
else:
|
||||
self.suboptimal_points.append((key, perf, t))
|
||||
return False
|
||||
|
||||
|
||||
class OperatingPointsWithRanges(OperatingPoints):
|
||||
"""
|
||||
Set of parameters that are each picked from a discrete range of values.
|
||||
An increase of each parameter is assumed to make the operation slower
|
||||
and more accurate.
|
||||
A key = int array of indices in the ordered set of parameters.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
OperatingPoints.__init__(self)
|
||||
# list of (name, values)
|
||||
self.ranges = []
|
||||
|
||||
def add_range(self, name, values):
|
||||
self.ranges.append((name, values))
|
||||
|
||||
def compare_keys(self, k1, k2):
|
||||
if np.all(k1 >= k2):
|
||||
return 1
|
||||
if np.all(k2 >= k1):
|
||||
return -1
|
||||
return 0
|
||||
|
||||
def do_nothing_key(self):
|
||||
return np.zeros(len(self.ranges), dtype=int)
|
||||
|
||||
def num_experiments(self):
|
||||
return int(np.prod([len(values) for name, values in self.ranges]))
|
||||
|
||||
def sample_experiments(self, n_autotune, rs=np.random):
|
||||
""" sample a set of experiments of max size n_autotune
|
||||
(run all experiments in random order if n_autotune is 0)
|
||||
"""
|
||||
assert n_autotune == 0 or n_autotune >= 2
|
||||
totex = self.num_experiments()
|
||||
rs = np.random.RandomState(123)
|
||||
if n_autotune == 0 or totex < n_autotune:
|
||||
experiments = rs.permutation(totex - 2)
|
||||
else:
|
||||
experiments = rs.choice(
|
||||
totex - 2, size=n_autotune - 2, replace=False)
|
||||
|
||||
experiments = [0, totex - 1] + [int(cno) + 1 for cno in experiments]
|
||||
return experiments
|
||||
|
||||
def cno_to_key(self, cno):
|
||||
"""Convert a sequential experiment number to a key"""
|
||||
k = np.zeros(len(self.ranges), dtype=int)
|
||||
for i, (name, values) in enumerate(self.ranges):
|
||||
k[i] = cno % len(values)
|
||||
cno //= len(values)
|
||||
assert cno == 0
|
||||
return k
|
||||
|
||||
def get_parameters(self, k):
|
||||
"""Convert a key to a dictionary with parameter values"""
|
||||
return {
|
||||
name: values[k[i]]
|
||||
for i, (name, values) in enumerate(self.ranges)
|
||||
}
|
||||
|
||||
def restrict_range(self, name, max_val):
|
||||
""" remove too large values from a range"""
|
||||
for name2, values in self.ranges:
|
||||
if name == name2:
|
||||
val2 = [v for v in values if v < max_val]
|
||||
values[:] = val2
|
||||
return
|
||||
raise RuntimeError(f"parameter {name} not found")
|
||||
|
||||
|
||||
###############################################################
|
||||
# Timer object
|
||||
|
||||
class TimerIter:
|
||||
def __init__(self, timer):
|
||||
self.ts = []
|
||||
self.runs = timer.runs
|
||||
self.timer = timer
|
||||
if timer.nt >= 0:
|
||||
faiss.omp_set_num_threads(timer.nt)
|
||||
|
||||
def __next__(self):
|
||||
timer = self.timer
|
||||
self.runs -= 1
|
||||
self.ts.append(time.time())
|
||||
total_time = self.ts[-1] - self.ts[0] if len(self.ts) >= 2 else 0
|
||||
if self.runs == -1 or total_time > timer.max_secs:
|
||||
if timer.nt >= 0:
|
||||
faiss.omp_set_num_threads(timer.remember_nt)
|
||||
ts = np.array(self.ts)
|
||||
times = ts[1:] - ts[:-1]
|
||||
if len(times) == timer.runs:
|
||||
timer.times = times[timer.warmup :]
|
||||
else:
|
||||
# if timeout, we use all the runs
|
||||
timer.times = times[:]
|
||||
raise StopIteration
|
||||
|
||||
class RepeatTimer:
|
||||
"""
|
||||
This is yet another timer object. It is adapted to Faiss by
|
||||
taking a number of openmp threads to set on input. It should be called
|
||||
in an explicit loop as:
|
||||
|
||||
timer = RepeatTimer(warmup=1, nt=1, runs=6)
|
||||
|
||||
for _ in timer:
|
||||
# perform operation
|
||||
|
||||
print(f"time={timer.get_ms():.1f} ± {timer.get_ms_std():.1f} ms")
|
||||
|
||||
the same timer can be re-used. In that case it is reset each time it
|
||||
enters a loop. It focuses on ms-scale times because for second scale
|
||||
it's usually less relevant to repeat the operation.
|
||||
"""
|
||||
def __init__(self, warmup=0, nt=-1, runs=1, max_secs=np.inf):
|
||||
assert warmup < runs
|
||||
self.warmup = warmup
|
||||
self.nt = nt
|
||||
self.runs = runs
|
||||
self.max_secs = max_secs
|
||||
self.remember_nt = faiss.omp_get_max_threads()
|
||||
|
||||
def __iter__(self):
|
||||
return TimerIter(self)
|
||||
|
||||
def ms(self):
|
||||
return np.mean(self.times) * 1000
|
||||
|
||||
def ms_std(self):
|
||||
return np.std(self.times) * 1000 if len(self.times) > 1 else 0.0
|
||||
|
||||
def nruns(self):
|
||||
""" effective number of runs (may be lower than runs - warmup due to timeout)"""
|
||||
return len(self.times)
|
||||
Reference in New Issue
Block a user