Initial commit
This commit is contained in:
0
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/__init__.py
vendored
Normal file
0
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/__init__.py
vendored
Normal file
1219
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/benchmark.py
vendored
Normal file
1219
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/benchmark.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
272
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/benchmark_io.py
vendored
Normal file
272
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/benchmark_io.py
vendored
Normal file
@@ -0,0 +1,272 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import hashlib
|
||||
import io
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import pickle
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
from zipfile import ZipFile
|
||||
|
||||
import faiss # @manual=//faiss/python:pyfaiss
|
||||
|
||||
import numpy as np
|
||||
import submitit
|
||||
from faiss.contrib.datasets import ( # @manual=//faiss/contrib:faiss_contrib
|
||||
dataset_from_name,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# merge RCQ coarse quantizer and ITQ encoder to one Faiss index
|
||||
def merge_rcq_itq(
|
||||
# pyre-ignore[11]: `faiss.ResidualCoarseQuantizer` is not defined as a type
|
||||
rcq_coarse_quantizer: faiss.ResidualCoarseQuantizer,
|
||||
itq_encoder: faiss.IndexPreTransform,
|
||||
# pyre-ignore[11]: `faiss.IndexIVFSpectralHash` is not defined as a type.
|
||||
) -> faiss.IndexIVFSpectralHash:
|
||||
# pyre-ignore[16]: `faiss` has no attribute `IndexIVFSpectralHash`.
|
||||
index = faiss.IndexIVFSpectralHash(
|
||||
rcq_coarse_quantizer,
|
||||
rcq_coarse_quantizer.d,
|
||||
rcq_coarse_quantizer.ntotal,
|
||||
itq_encoder.sa_code_size() * 8,
|
||||
1000000, # larger than the magnitude of the vectors
|
||||
)
|
||||
index.replace_vt(itq_encoder)
|
||||
return index
|
||||
|
||||
|
||||
@dataclass
|
||||
class BenchmarkIO:
|
||||
path: str # local path
|
||||
|
||||
def __init__(self, path: str):
|
||||
self.path = path
|
||||
self.cached_ds: Dict[Any, Any] = {}
|
||||
|
||||
def clone(self):
|
||||
return BenchmarkIO(path=self.path)
|
||||
|
||||
def get_local_filepath(self, filename):
|
||||
if len(filename) > 184:
|
||||
fn, ext = os.path.splitext(filename)
|
||||
filename = (
|
||||
fn[:184] + hashlib.sha256(filename.encode()).hexdigest() + ext
|
||||
)
|
||||
return os.path.join(self.path, filename)
|
||||
|
||||
def get_remote_filepath(self, filename) -> Optional[str]:
|
||||
return None
|
||||
|
||||
def download_file_from_blobstore(
|
||||
self,
|
||||
filename: str,
|
||||
bucket: Optional[str] = None,
|
||||
path: Optional[str] = None,
|
||||
):
|
||||
return self.get_local_filepath(filename)
|
||||
|
||||
def upload_file_to_blobstore(
|
||||
self,
|
||||
filename: str,
|
||||
bucket: Optional[str] = None,
|
||||
path: Optional[str] = None,
|
||||
overwrite: bool = False,
|
||||
):
|
||||
pass
|
||||
|
||||
def file_exist(self, filename: str):
|
||||
fn = self.get_local_filepath(filename)
|
||||
exists = os.path.exists(fn)
|
||||
logger.info(f"{filename} {exists=}")
|
||||
return exists
|
||||
|
||||
def read_file(self, filename: str, keys: List[str]):
|
||||
fn = self.download_file_from_blobstore(filename)
|
||||
logger.info(f"Loading file {fn}")
|
||||
results = []
|
||||
with ZipFile(fn, "r") as zip_file:
|
||||
for key in keys:
|
||||
with zip_file.open(key, "r") as f:
|
||||
if key in ["D", "I", "R", "lims"]:
|
||||
results.append(np.load(f))
|
||||
elif key in ["P"]:
|
||||
t = io.TextIOWrapper(f)
|
||||
results.append(json.load(t))
|
||||
else:
|
||||
raise AssertionError()
|
||||
return results
|
||||
|
||||
def write_file(
|
||||
self,
|
||||
filename: str,
|
||||
keys: List[str],
|
||||
values: List[Any],
|
||||
overwrite: bool = False,
|
||||
):
|
||||
fn = self.get_local_filepath(filename)
|
||||
with ZipFile(fn, "w") as zip_file:
|
||||
for key, value in zip(keys, values, strict=True):
|
||||
with zip_file.open(key, "w", force_zip64=True) as f:
|
||||
if key in ["D", "I", "R", "lims"]:
|
||||
np.save(f, value)
|
||||
elif key in ["P"]:
|
||||
t = io.TextIOWrapper(f, write_through=True)
|
||||
json.dump(value, t)
|
||||
else:
|
||||
raise AssertionError()
|
||||
self.upload_file_to_blobstore(filename, overwrite=overwrite)
|
||||
|
||||
def get_dataset(self, dataset):
|
||||
if dataset not in self.cached_ds:
|
||||
if (
|
||||
dataset.namespace is not None
|
||||
and dataset.namespace[:4] == "std_"
|
||||
):
|
||||
if dataset.tablename not in self.cached_ds:
|
||||
self.cached_ds[dataset.tablename] = dataset_from_name(
|
||||
dataset.tablename,
|
||||
)
|
||||
p = dataset.namespace[4]
|
||||
if p == "t":
|
||||
self.cached_ds[dataset] = self.cached_ds[
|
||||
dataset.tablename
|
||||
].get_train(dataset.num_vectors)
|
||||
elif p == "d":
|
||||
self.cached_ds[dataset] = self.cached_ds[
|
||||
dataset.tablename
|
||||
].get_database()
|
||||
elif p == "q":
|
||||
self.cached_ds[dataset] = self.cached_ds[
|
||||
dataset.tablename
|
||||
].get_queries()
|
||||
else:
|
||||
raise ValueError
|
||||
elif dataset.namespace == "syn":
|
||||
d, seed = dataset.tablename.split("_")
|
||||
d = int(d)
|
||||
seed = int(seed)
|
||||
n = dataset.num_vectors
|
||||
# based on faiss.contrib.datasets.SyntheticDataset
|
||||
d1 = 10
|
||||
rs = np.random.RandomState(seed)
|
||||
x = rs.normal(size=(n, d1))
|
||||
x = np.dot(x, rs.rand(d1, d))
|
||||
x = x * (rs.rand(d) * 4 + 0.1)
|
||||
x = np.sin(x)
|
||||
x = x.astype(np.float32)
|
||||
self.cached_ds[dataset] = x
|
||||
else:
|
||||
self.cached_ds[dataset] = self.read_nparray(
|
||||
os.path.join(self.path, dataset.tablename),
|
||||
mmap_mode="r",
|
||||
)[: dataset.num_vectors].copy()
|
||||
return self.cached_ds[dataset]
|
||||
|
||||
def read_nparray(
|
||||
self,
|
||||
filename: str,
|
||||
mmap_mode: Optional[str] = None,
|
||||
):
|
||||
fn = self.download_file_from_blobstore(filename)
|
||||
logger.info(f"Loading nparray from {fn}")
|
||||
nparray = np.load(fn, mmap_mode=mmap_mode)
|
||||
logger.info(f"Loaded nparray {nparray.shape} from {fn}")
|
||||
return nparray
|
||||
|
||||
def write_nparray(
|
||||
self,
|
||||
nparray: np.ndarray,
|
||||
filename: str,
|
||||
):
|
||||
fn = self.get_local_filepath(filename)
|
||||
logger.info(f"Saving nparray {nparray.shape} to {fn}")
|
||||
np.save(fn, nparray)
|
||||
self.upload_file_to_blobstore(filename)
|
||||
|
||||
def read_json(
|
||||
self,
|
||||
filename: str,
|
||||
):
|
||||
fn = self.download_file_from_blobstore(filename)
|
||||
logger.info(f"Loading json {fn}")
|
||||
with open(fn, "r") as fp:
|
||||
json_dict = json.load(fp)
|
||||
logger.info(f"Loaded json {json_dict} from {fn}")
|
||||
return json_dict
|
||||
|
||||
def write_json(
|
||||
self,
|
||||
json_dict: dict[str, Any],
|
||||
filename: str,
|
||||
overwrite: bool = False,
|
||||
):
|
||||
fn = self.get_local_filepath(filename)
|
||||
logger.info(f"Saving json {json_dict} to {fn}")
|
||||
with open(fn, "w") as fp:
|
||||
json.dump(json_dict, fp)
|
||||
self.upload_file_to_blobstore(filename, overwrite=overwrite)
|
||||
|
||||
def read_index(
|
||||
self,
|
||||
filename: str,
|
||||
bucket: Optional[str] = None,
|
||||
path: Optional[str] = None,
|
||||
):
|
||||
fn = self.download_file_from_blobstore(filename, bucket, path)
|
||||
logger.info(f"Loading index {fn}")
|
||||
ext = os.path.splitext(fn)[1]
|
||||
if ext in [".faiss", ".codec", ".index"]:
|
||||
index = faiss.read_index(fn)
|
||||
elif ext == ".pkl":
|
||||
with open(fn, "rb") as model_file:
|
||||
model = pickle.load(model_file)
|
||||
rcq_coarse_quantizer, itq_encoder = model["model"]
|
||||
index = merge_rcq_itq(rcq_coarse_quantizer, itq_encoder)
|
||||
logger.info(f"Loaded index from {fn}")
|
||||
return index
|
||||
|
||||
def write_index(
|
||||
self,
|
||||
index: faiss.Index,
|
||||
filename: str,
|
||||
):
|
||||
fn = self.get_local_filepath(filename)
|
||||
logger.info(f"Saving index to {fn}")
|
||||
faiss.write_index(index, fn)
|
||||
self.upload_file_to_blobstore(filename)
|
||||
assert os.path.exists(fn)
|
||||
return os.path.getsize(fn)
|
||||
|
||||
def launch_jobs(self, func, params, local=True):
|
||||
if local:
|
||||
results = [func(p) for p in params]
|
||||
return results
|
||||
logger.info(f"launching {len(params)} jobs")
|
||||
executor = submitit.AutoExecutor(folder="/checkpoint/gsz/jobs")
|
||||
executor.update_parameters(
|
||||
nodes=1,
|
||||
gpus_per_node=8,
|
||||
cpus_per_task=80,
|
||||
# mem_gb=640,
|
||||
tasks_per_node=1,
|
||||
name="faiss_benchmark",
|
||||
slurm_array_parallelism=512,
|
||||
slurm_partition="scavenge",
|
||||
slurm_time=4 * 60,
|
||||
slurm_constraint="bldg1",
|
||||
)
|
||||
jobs = executor.map_array(func, params)
|
||||
logger.info(f"launched {len(jobs)} jobs")
|
||||
for job, param in zip(jobs, params):
|
||||
logger.info(f"{job.job_id=} {param[0]=}")
|
||||
results = [job.result() for job in jobs]
|
||||
print(f"received {len(results)} results")
|
||||
return results
|
||||
379
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/descriptors.py
vendored
Normal file
379
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/descriptors.py
vendored
Normal file
@@ -0,0 +1,379 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
import faiss # @manual=//faiss/python:pyfaiss
|
||||
|
||||
from .benchmark_io import BenchmarkIO
|
||||
from .utils import timer
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Important: filenames end with . without extension (npy, codec, index),
|
||||
# when writing files, you are required to filename + "npy" etc.
|
||||
|
||||
@dataclass
|
||||
class IndexDescriptorClassic:
|
||||
bucket: Optional[str] = None
|
||||
# either path or factory should be set,
|
||||
# but not both at the same time.
|
||||
path: Optional[str] = None
|
||||
factory: Optional[str] = None
|
||||
codec_alias: Optional[str] = None
|
||||
construction_params: Optional[List[Dict[str, int]]] = None
|
||||
search_params: Optional[Dict[str, int]] = None
|
||||
# range metric definitions
|
||||
# key: name
|
||||
# value: one of the following:
|
||||
#
|
||||
# radius
|
||||
# [0..radius) -> 1
|
||||
# [radius..inf) -> 0
|
||||
#
|
||||
# [[radius1, score1], ...]
|
||||
# [0..radius1) -> score1
|
||||
# [radius1..radius2) -> score2
|
||||
#
|
||||
# [[radius1_from, radius1_to, score1], ...]
|
||||
# [radius1_from, radius1_to) -> score1,
|
||||
# [radius2_from, radius2_to) -> score2
|
||||
range_metrics: Optional[Dict[str, Any]] = None
|
||||
radius: Optional[float] = None
|
||||
training_size: Optional[int] = None
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
@dataclass
|
||||
class DatasetDescriptor:
|
||||
# namespace possible values:
|
||||
# 1. a hive namespace
|
||||
# 2. 'std_t', 'std_d', 'std_q' for the standard datasets
|
||||
# via faiss.contrib.datasets.dataset_from_name()
|
||||
# t - training, d - database, q - queries
|
||||
# eg. "std_t"
|
||||
# 3. 'syn' for synthetic data
|
||||
# 4. None for local files
|
||||
namespace: Optional[str] = None
|
||||
|
||||
# tablename possible values, corresponding to the
|
||||
# namespace value above:
|
||||
# 1. a hive table name
|
||||
# 2. name of the standard dataset as recognized
|
||||
# by faiss.contrib.datasets.dataset_from_name()
|
||||
# eg. "bigann1M"
|
||||
# 3. d_seed, eg. 128_1234 for 128 dimensional vectors
|
||||
# with seed 1234
|
||||
# 4. a local file name (relative to benchmark_io.path)
|
||||
tablename: Optional[str] = None
|
||||
|
||||
# partition names and values for hive
|
||||
# eg. ["ds=2021-09-01"]
|
||||
partitions: Optional[List[str]] = None
|
||||
|
||||
# number of vectors to load from the dataset
|
||||
num_vectors: Optional[int] = None
|
||||
|
||||
embedding_column: Optional[str] = None
|
||||
|
||||
# only when the embedding column is a map
|
||||
embedding_column_key: Optional[Any] = None
|
||||
|
||||
embedding_id_column: Optional[str] = None
|
||||
|
||||
# filters on the dataset where each filter is a
|
||||
# string rep of a filter expression
|
||||
filters: Optional[List[str]] = None
|
||||
|
||||
# unused in open-source
|
||||
splits_distribution: Optional[List[List[bytes]]] = None
|
||||
|
||||
# unused in open-source
|
||||
splits: Optional[List[bytes]] = None
|
||||
|
||||
# unused in open-source
|
||||
serialized_df: Optional[str] = None
|
||||
|
||||
sampling_rate: Optional[float] = None
|
||||
|
||||
# sampling column for xdb
|
||||
sampling_column: Optional[str] = None
|
||||
|
||||
# blob store
|
||||
bucket: Optional[str] = None
|
||||
path: Optional[str] = None
|
||||
|
||||
# desc_name
|
||||
desc_name: Optional[str] = None
|
||||
|
||||
normalize_L2: bool = False
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.get_filename())
|
||||
|
||||
def get_filename(
|
||||
self,
|
||||
prefix: Optional[str] = None,
|
||||
) -> str:
|
||||
if self.desc_name is not None:
|
||||
return self.desc_name
|
||||
|
||||
filename = ""
|
||||
if prefix is not None:
|
||||
filename += prefix + "_"
|
||||
if self.namespace is not None:
|
||||
filename += self.namespace + "_"
|
||||
assert self.tablename is not None
|
||||
filename += self.tablename
|
||||
if self.partitions is not None:
|
||||
filename += "_" + "_".join(
|
||||
self.partitions
|
||||
).replace("=", "_").replace("/", "_")
|
||||
if self.num_vectors is not None:
|
||||
filename += f"_{self.num_vectors}"
|
||||
filename += "."
|
||||
|
||||
self.desc_name = filename
|
||||
return self.desc_name
|
||||
|
||||
def get_kmeans_filename(self, k):
|
||||
return f"{self.get_filename()}kmeans_{k}."
|
||||
|
||||
def k_means(self, io, k, dry_run):
|
||||
logger.info(f"k_means {k} {self}")
|
||||
kmeans_vectors = DatasetDescriptor(
|
||||
tablename=f"{self.get_filename()}kmeans_{k}"
|
||||
)
|
||||
kmeans_filename = kmeans_vectors.get_filename() + "npy"
|
||||
meta_filename = kmeans_vectors.get_filename() + "json"
|
||||
if not io.file_exist(kmeans_filename) or not io.file_exist(
|
||||
meta_filename
|
||||
):
|
||||
if dry_run:
|
||||
return None, None, kmeans_filename
|
||||
x = io.get_dataset(self)
|
||||
kmeans = faiss.Kmeans(d=x.shape[1], k=k, gpu=True)
|
||||
_, t, _ = timer("k_means", lambda: kmeans.train(x))
|
||||
io.write_nparray(kmeans.centroids, kmeans_filename)
|
||||
io.write_json({"k_means_time": t}, meta_filename)
|
||||
else:
|
||||
t = io.read_json(meta_filename)["k_means_time"]
|
||||
return kmeans_vectors, t, None
|
||||
|
||||
@dataclass
|
||||
class IndexBaseDescriptor:
|
||||
d: int
|
||||
metric: str
|
||||
desc_name: Optional[str] = None
|
||||
flat_desc_name: Optional[str] = None
|
||||
bucket: Optional[str] = None
|
||||
path: Optional[str] = None
|
||||
num_threads: int = 1
|
||||
|
||||
def get_name(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
def get_path(self, benchmark_io: BenchmarkIO) -> Optional[str]:
|
||||
if self.path is not None:
|
||||
return self.path
|
||||
self.path = benchmark_io.get_remote_filepath(self.desc_name)
|
||||
return self.path
|
||||
|
||||
@staticmethod
|
||||
def param_dict_list_to_name(param_dict_list):
|
||||
if not param_dict_list:
|
||||
return ""
|
||||
l = 0
|
||||
n = ""
|
||||
for param_dict in param_dict_list:
|
||||
n += IndexBaseDescriptor.param_dict_to_name(param_dict, f"cp{l}")
|
||||
l += 1
|
||||
return n
|
||||
|
||||
@staticmethod
|
||||
def param_dict_to_name(param_dict, prefix="sp"):
|
||||
if not param_dict:
|
||||
return ""
|
||||
n = prefix
|
||||
for name, val in param_dict.items():
|
||||
if name == "snap":
|
||||
continue
|
||||
if name == "lsq_gpu" and val == 0:
|
||||
continue
|
||||
if name == "use_beam_LUT" and val == 0:
|
||||
continue
|
||||
n += f"_{name}_{val}"
|
||||
if n == prefix:
|
||||
return ""
|
||||
n += "."
|
||||
return n
|
||||
|
||||
|
||||
@dataclass
|
||||
class CodecDescriptor(IndexBaseDescriptor):
|
||||
# either path or factory should be set,
|
||||
# but not both at the same time.
|
||||
factory: Optional[str] = None
|
||||
construction_params: Optional[List[Dict[str, int]]] = None
|
||||
training_vectors: Optional[DatasetDescriptor] = None
|
||||
FILENAME_PREFIX: str = "xt"
|
||||
|
||||
def __post_init__(self):
|
||||
self.get_name()
|
||||
|
||||
def is_trained(self):
|
||||
return self.factory is None and self.path is not None
|
||||
|
||||
def is_valid(self):
|
||||
return self.factory is not None or self.path is not None
|
||||
|
||||
def get_name(self) -> str:
|
||||
if self.desc_name is not None:
|
||||
return self.desc_name
|
||||
if self.factory is not None:
|
||||
self.desc_name = self.name_from_factory()
|
||||
return self.desc_name
|
||||
if self.path is not None:
|
||||
self.desc_name = self.name_from_path()
|
||||
return self.desc_name
|
||||
raise ValueError("name, factory or path must be set")
|
||||
|
||||
def flat_name(self) -> str:
|
||||
if self.flat_desc_name is not None:
|
||||
return self.flat_desc_name
|
||||
self.flat_desc_name = f"Flat.d_{self.d}.{self.metric.upper()}."
|
||||
return self.flat_desc_name
|
||||
|
||||
def path(self, benchmark_io) -> str:
|
||||
if self.path is not None:
|
||||
return self.path
|
||||
return benchmark_io.get_remote_filepath(self.get_name())
|
||||
|
||||
def name_from_factory(self) -> str:
|
||||
assert self.factory is not None
|
||||
name = f"{self.factory.replace(',', '_')}."
|
||||
assert self.d is not None
|
||||
assert self.metric is not None
|
||||
name += f"d_{self.d}.{self.metric.upper()}."
|
||||
if self.factory != "Flat":
|
||||
assert self.training_vectors is not None
|
||||
name += self.training_vectors.get_filename(CodecDescriptor.FILENAME_PREFIX)
|
||||
name += IndexBaseDescriptor.param_dict_list_to_name(self.construction_params)
|
||||
return name
|
||||
|
||||
def name_from_path(self):
|
||||
assert self.path is not None
|
||||
filename = os.path.basename(self.path)
|
||||
ext = filename.split(".")[-1]
|
||||
if filename.endswith(ext):
|
||||
name = filename[:-len(ext)]
|
||||
else: # should never hit this rather raise value error
|
||||
name = filename
|
||||
return name
|
||||
|
||||
def alias(self, benchmark_io: BenchmarkIO):
|
||||
if hasattr(benchmark_io, "bucket"):
|
||||
return CodecDescriptor(desc_name=self.get_name(), bucket=benchmark_io.bucket, path=self.get_path(benchmark_io), d=self.d, metric=self.metric)
|
||||
return CodecDescriptor(desc_name=self.get_name(), d=self.d, metric=self.metric)
|
||||
|
||||
|
||||
@dataclass
|
||||
class IndexDescriptor(IndexBaseDescriptor):
|
||||
codec_desc: Optional[CodecDescriptor] = None
|
||||
database_desc: Optional[DatasetDescriptor] = None
|
||||
FILENAME_PREFIX: str = "xb"
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def __post_init__(self):
|
||||
self.get_name()
|
||||
|
||||
def is_built(self):
|
||||
return self.codec_desc is None and self.database_desc is None
|
||||
|
||||
def get_name(self) -> str:
|
||||
if self.desc_name is None:
|
||||
self.desc_name = self.codec_desc.get_name() + self.database_desc.get_filename(prefix=IndexDescriptor.FILENAME_PREFIX)
|
||||
|
||||
return self.desc_name
|
||||
|
||||
def flat_name(self):
|
||||
if self.flat_desc_name is not None:
|
||||
return self.flat_desc_name
|
||||
self.flat_desc_name = self.codec_desc.flat_name() + self.database_desc.get_filename(prefix=IndexDescriptor.FILENAME_PREFIX)
|
||||
return self.flat_desc_name
|
||||
|
||||
# alias is used to refer when index is uploaded to blobstore and refered again
|
||||
def alias(self, benchmark_io: BenchmarkIO):
|
||||
if hasattr(benchmark_io, "bucket"):
|
||||
return IndexDescriptor(desc_name=self.get_name(), bucket=benchmark_io.bucket, path=self.get_path(benchmark_io), d=self.d, metric=self.metric)
|
||||
return IndexDescriptor(desc_name=self.get_name(), d=self.d, metric=self.metric)
|
||||
|
||||
@dataclass
|
||||
class KnnDescriptor(IndexBaseDescriptor):
|
||||
index_desc: Optional[IndexDescriptor] = None
|
||||
gt_index_desc: Optional[IndexDescriptor] = None
|
||||
query_dataset: Optional[DatasetDescriptor] = None
|
||||
search_params: Optional[Dict[str, int]] = None
|
||||
reconstruct: bool = False
|
||||
FILENAME_PREFIX: str = "q"
|
||||
# range metric definitions
|
||||
# key: name
|
||||
# value: one of the following:
|
||||
#
|
||||
# radius
|
||||
# [0..radius) -> 1
|
||||
# [radius..inf) -> 0
|
||||
#
|
||||
# [[radius1, score1], ...]
|
||||
# [0..radius1) -> score1
|
||||
# [radius1..radius2) -> score2
|
||||
#
|
||||
# [[radius1_from, radius1_to, score1], ...]
|
||||
# [radius1_from, radius1_to) -> score1,
|
||||
# [radius2_from, radius2_to) -> score2
|
||||
range_metrics: Optional[Dict[str, Any]] = None
|
||||
radius: Optional[float] = None
|
||||
k: int = 1
|
||||
|
||||
range_ref_index_desc: Optional[str] = None
|
||||
|
||||
def __hash__(self):
|
||||
return hash(str(self))
|
||||
|
||||
def get_name(self):
|
||||
if self.desc_name is not None:
|
||||
return self.desc_name
|
||||
name = self.index_desc.get_name()
|
||||
name += IndexBaseDescriptor.param_dict_to_name(self.search_params)
|
||||
name += self.query_dataset.get_filename(KnnDescriptor.FILENAME_PREFIX)
|
||||
name += f"k_{self.k}."
|
||||
name += f"t_{self.num_threads}."
|
||||
if self.reconstruct:
|
||||
name += "rec."
|
||||
else:
|
||||
name += "knn."
|
||||
self.desc_name = name
|
||||
return name
|
||||
|
||||
def flat_name(self):
|
||||
if self.flat_desc_name is not None:
|
||||
return self.flat_desc_name
|
||||
name = self.index_desc.flat_name()
|
||||
name += self.query_dataset.get_filename(KnnDescriptor.FILENAME_PREFIX)
|
||||
name += f"k_{self.k}."
|
||||
name += f"t_{self.num_threads}."
|
||||
if self.reconstruct:
|
||||
name += "rec."
|
||||
else:
|
||||
name += "knn."
|
||||
self.flat_desc_name = name
|
||||
return name
|
||||
1146
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/index.py
vendored
Normal file
1146
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/index.py
vendored
Normal file
File diff suppressed because it is too large
Load Diff
335
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/optimize.py
vendored
Normal file
335
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/optimize.py
vendored
Normal file
@@ -0,0 +1,335 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List, Tuple
|
||||
|
||||
import faiss # @manual=//faiss/python:pyfaiss
|
||||
|
||||
# from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
|
||||
# OperatingPoints,
|
||||
# )
|
||||
|
||||
from .benchmark import Benchmark
|
||||
from .descriptors import DatasetDescriptor, IndexDescriptorClassic
|
||||
from .utils import dict_merge, filter_results, ParetoMetric, ParetoMode
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Optimizer:
|
||||
distance_metric: str = "L2"
|
||||
num_threads: int = 32
|
||||
run_local: bool = True
|
||||
|
||||
def __post_init__(self):
|
||||
self.cached_benchmark = None
|
||||
if self.distance_metric == "IP":
|
||||
self.distance_metric_type = faiss.METRIC_INNER_PRODUCT
|
||||
elif self.distance_metric == "L2":
|
||||
self.distance_metric_type = faiss.METRIC_L2
|
||||
else:
|
||||
raise ValueError
|
||||
|
||||
def set_io(self, benchmark_io):
|
||||
self.io = benchmark_io
|
||||
self.io.distance_metric = self.distance_metric
|
||||
self.io.distance_metric_type = self.distance_metric_type
|
||||
|
||||
def benchmark_and_filter_candidates(
|
||||
self,
|
||||
index_descs,
|
||||
training_vectors,
|
||||
database_vectors,
|
||||
query_vectors,
|
||||
result_file,
|
||||
include_flat,
|
||||
min_accuracy,
|
||||
pareto_metric,
|
||||
):
|
||||
benchmark = Benchmark(
|
||||
num_threads=self.num_threads,
|
||||
training_vectors=training_vectors,
|
||||
database_vectors=database_vectors,
|
||||
query_vectors=query_vectors,
|
||||
index_descs=index_descs,
|
||||
k=10,
|
||||
distance_metric=self.distance_metric,
|
||||
)
|
||||
benchmark.set_io(self.io)
|
||||
results = benchmark.benchmark(
|
||||
result_file=result_file, local=self.run_local, train=True, knn=True
|
||||
)
|
||||
assert results
|
||||
filtered = filter_results(
|
||||
results=results,
|
||||
evaluation="knn",
|
||||
accuracy_metric="knn_intersection",
|
||||
min_accuracy=min_accuracy,
|
||||
name_filter=None
|
||||
if include_flat
|
||||
else (lambda n: not n.startswith("Flat")),
|
||||
pareto_mode=ParetoMode.GLOBAL,
|
||||
pareto_metric=pareto_metric,
|
||||
)
|
||||
assert filtered
|
||||
index_descs = [
|
||||
IndexDescriptorClassic(
|
||||
factory=v["factory"],
|
||||
construction_params=v["construction_params"],
|
||||
search_params=v["search_params"],
|
||||
)
|
||||
for _, _, _, _, v in filtered
|
||||
]
|
||||
return index_descs, filtered
|
||||
|
||||
def optimize_quantizer(
|
||||
self,
|
||||
training_vectors: DatasetDescriptor,
|
||||
query_vectors: DatasetDescriptor,
|
||||
nlists: List[int],
|
||||
min_accuracy: float,
|
||||
):
|
||||
quantizer_descs = {}
|
||||
for nlist in nlists:
|
||||
# cluster
|
||||
centroids, _, _ = training_vectors.k_means(
|
||||
self.io,
|
||||
nlist,
|
||||
dry_run=False,
|
||||
)
|
||||
|
||||
descs = [IndexDescriptorClassic(factory="Flat"),] + [
|
||||
IndexDescriptorClassic(
|
||||
factory="HNSW32",
|
||||
construction_params=[{"efConstruction": 2**i}],
|
||||
)
|
||||
for i in range(6, 11)
|
||||
]
|
||||
|
||||
descs, _ = self.benchmark_and_filter_candidates(
|
||||
descs,
|
||||
training_vectors=centroids,
|
||||
database_vectors=centroids,
|
||||
query_vectors=query_vectors,
|
||||
result_file=f"result_{centroids.get_filename()}json",
|
||||
include_flat=True,
|
||||
min_accuracy=min_accuracy,
|
||||
pareto_metric=ParetoMetric.TIME,
|
||||
)
|
||||
quantizer_descs[nlist] = descs
|
||||
|
||||
return quantizer_descs
|
||||
|
||||
def optimize_ivf(
|
||||
self,
|
||||
result_file: str,
|
||||
training_vectors: DatasetDescriptor,
|
||||
database_vectors: DatasetDescriptor,
|
||||
query_vectors: DatasetDescriptor,
|
||||
quantizers: Dict[int, List[IndexDescriptorClassic]],
|
||||
codecs: List[Tuple[str, str]],
|
||||
min_accuracy: float,
|
||||
):
|
||||
ivf_descs = []
|
||||
for nlist, quantizer_descs in quantizers.items():
|
||||
# build IVF index
|
||||
for quantizer_desc in quantizer_descs:
|
||||
for pretransform, fine_ivf in codecs:
|
||||
if pretransform is None:
|
||||
pretransform = ""
|
||||
else:
|
||||
pretransform = pretransform + ","
|
||||
if quantizer_desc.construction_params is None:
|
||||
construction_params = [
|
||||
None,
|
||||
quantizer_desc.search_params,
|
||||
]
|
||||
else:
|
||||
construction_params = [
|
||||
None
|
||||
] + quantizer_desc.construction_params
|
||||
if quantizer_desc.search_params is not None:
|
||||
dict_merge(
|
||||
construction_params[1],
|
||||
quantizer_desc.search_params,
|
||||
)
|
||||
ivf_descs.append(
|
||||
IndexDescriptorClassic(
|
||||
factory=f"{pretransform}IVF{nlist}({quantizer_desc.factory}),{fine_ivf}",
|
||||
construction_params=construction_params,
|
||||
)
|
||||
)
|
||||
return self.benchmark_and_filter_candidates(
|
||||
ivf_descs,
|
||||
training_vectors,
|
||||
database_vectors,
|
||||
query_vectors,
|
||||
result_file,
|
||||
include_flat=False,
|
||||
min_accuracy=min_accuracy,
|
||||
pareto_metric=ParetoMetric.TIME_SPACE,
|
||||
)
|
||||
|
||||
# train an IVFFlat index
|
||||
# find the nprobe required for the given accuracy
|
||||
def ivf_flat_nprobe_required_for_accuracy(
|
||||
self,
|
||||
result_file: str,
|
||||
training_vectors: DatasetDescriptor,
|
||||
database_vectors: DatasetDescriptor,
|
||||
query_vectors: DatasetDescriptor,
|
||||
nlist,
|
||||
accuracy,
|
||||
):
|
||||
_, results = self.benchmark_and_filter_candidates(
|
||||
index_descs=[
|
||||
IndexDescriptorClassic(factory=f"IVF{nlist}(Flat),Flat"),
|
||||
],
|
||||
training_vectors=training_vectors,
|
||||
database_vectors=database_vectors,
|
||||
query_vectors=query_vectors,
|
||||
result_file=result_file,
|
||||
include_flat=False,
|
||||
min_accuracy=accuracy,
|
||||
pareto_metric=ParetoMetric.TIME,
|
||||
)
|
||||
nprobe = nlist // 2
|
||||
for _, _, _, k, v in results:
|
||||
if (
|
||||
".knn" in k
|
||||
and "nprobe" in v["search_params"]
|
||||
and v["knn_intersection"] >= accuracy
|
||||
):
|
||||
nprobe = min(nprobe, v["search_params"]["nprobe"])
|
||||
return nprobe
|
||||
|
||||
# train candidate IVF codecs
|
||||
# benchmark them at the same nprobe
|
||||
# keep only the space _and_ time Pareto optimal
|
||||
def optimize_codec(
|
||||
self,
|
||||
result_file: str,
|
||||
d: int,
|
||||
training_vectors: DatasetDescriptor,
|
||||
database_vectors: DatasetDescriptor,
|
||||
query_vectors: DatasetDescriptor,
|
||||
nlist: int,
|
||||
nprobe: int,
|
||||
min_accuracy: float,
|
||||
):
|
||||
codecs = (
|
||||
[
|
||||
(None, "Flat"),
|
||||
(None, "SQfp16"),
|
||||
(None, "SQbf16"),
|
||||
(None, "SQ8"),
|
||||
(None, "SQ8_direct_signed"),
|
||||
] + [
|
||||
(f"OPQ{M}_{M * dim}", f"PQ{M}x{b}")
|
||||
for M in [8, 12, 16, 32, 48, 64, 96, 128, 192, 256]
|
||||
if d % M == 0
|
||||
for dim in range(2, 18, 2)
|
||||
if M * dim <= d
|
||||
for b in range(4, 14, 2)
|
||||
if M * b < d * 8 # smaller than SQ8
|
||||
] + [
|
||||
(None, f"PQ{M}x{b}")
|
||||
for M in [8, 12, 16, 32, 48, 64, 96, 128, 192, 256]
|
||||
if d % M == 0
|
||||
for b in range(8, 14, 2)
|
||||
if M * b < d * 8 # smaller than SQ8
|
||||
]
|
||||
)
|
||||
factory = {}
|
||||
for opq, pq in codecs:
|
||||
factory[
|
||||
f"IVF{nlist},{pq}" if opq is None else f"{opq},IVF{nlist},{pq}"
|
||||
] = (
|
||||
opq,
|
||||
pq,
|
||||
)
|
||||
|
||||
_, filtered = self.benchmark_and_filter_candidates(
|
||||
index_descs=[
|
||||
IndexDescriptorClassic(
|
||||
factory=f"IVF{nlist},{pq}"
|
||||
if opq is None
|
||||
else f"{opq},IVF{nlist},{pq}",
|
||||
search_params={
|
||||
"nprobe": nprobe,
|
||||
},
|
||||
)
|
||||
for opq, pq in codecs
|
||||
],
|
||||
training_vectors=training_vectors,
|
||||
database_vectors=database_vectors,
|
||||
query_vectors=query_vectors,
|
||||
result_file=result_file,
|
||||
include_flat=False,
|
||||
min_accuracy=min_accuracy,
|
||||
pareto_metric=ParetoMetric.TIME_SPACE,
|
||||
)
|
||||
results = [
|
||||
factory[r] for r in set(v["factory"] for _, _, _, k, v in filtered)
|
||||
]
|
||||
return results
|
||||
|
||||
def optimize(
|
||||
self,
|
||||
d: int,
|
||||
training_vectors: DatasetDescriptor,
|
||||
database_vectors_list: List[DatasetDescriptor],
|
||||
query_vectors: DatasetDescriptor,
|
||||
min_accuracy: float,
|
||||
):
|
||||
# train an IVFFlat index
|
||||
# find the nprobe required for near perfect accuracy
|
||||
nlist = 4096
|
||||
nprobe_at_95 = self.ivf_flat_nprobe_required_for_accuracy(
|
||||
result_file=f"result_ivf{nlist}_flat.json",
|
||||
training_vectors=training_vectors,
|
||||
database_vectors=database_vectors_list[0],
|
||||
query_vectors=query_vectors,
|
||||
nlist=nlist,
|
||||
accuracy=0.95,
|
||||
)
|
||||
|
||||
# train candidate IVF codecs
|
||||
# benchmark them at the same nprobe
|
||||
# keep only the space and time Pareto optima
|
||||
codecs = self.optimize_codec(
|
||||
result_file=f"result_ivf{nlist}_codec.json",
|
||||
d=d,
|
||||
training_vectors=training_vectors,
|
||||
database_vectors=database_vectors_list[0],
|
||||
query_vectors=query_vectors,
|
||||
nlist=nlist,
|
||||
nprobe=nprobe_at_95,
|
||||
min_accuracy=min_accuracy,
|
||||
)
|
||||
|
||||
# optimize coarse quantizers
|
||||
quantizers = self.optimize_quantizer(
|
||||
training_vectors=training_vectors,
|
||||
query_vectors=query_vectors,
|
||||
nlists=[4096, 8192, 16384, 32768],
|
||||
min_accuracy=0.7,
|
||||
)
|
||||
|
||||
# combine them with the codecs
|
||||
# test them at different scales
|
||||
for database_vectors in database_vectors_list:
|
||||
self.optimize_ivf(
|
||||
result_file=f"result_{database_vectors.get_filename()}json",
|
||||
training_vectors=training_vectors,
|
||||
database_vectors=database_vectors,
|
||||
query_vectors=query_vectors,
|
||||
quantizers=quantizers,
|
||||
codecs=codecs,
|
||||
min_accuracy=min_accuracy,
|
||||
)
|
||||
248
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/utils.py
vendored
Normal file
248
packages/leann-backend-hnsw/third_party/faiss/benchs/bench_fw/utils.py
vendored
Normal file
@@ -0,0 +1,248 @@
|
||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
#
|
||||
# This source code is licensed under the MIT license found in the
|
||||
# LICENSE file in the root directory of this source tree.
|
||||
|
||||
import functools
|
||||
import logging
|
||||
from enum import Enum
|
||||
from multiprocessing.pool import ThreadPool
|
||||
from time import perf_counter
|
||||
|
||||
import faiss # @manual=//faiss/python:pyfaiss
|
||||
import numpy as np
|
||||
|
||||
from faiss.contrib.evaluation import ( # @manual=//faiss/contrib:faiss_contrib
|
||||
OperatingPoints,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def timer(name, func, once=False) -> float:
|
||||
logger.info(f"Measuring {name}")
|
||||
t1 = perf_counter()
|
||||
res = func()
|
||||
t2 = perf_counter()
|
||||
t = t2 - t1
|
||||
repeat = 1
|
||||
if not once and t < 1.0:
|
||||
repeat = int(2.0 // t)
|
||||
logger.info(
|
||||
f"Time for {name}: {t:.3f} seconds, repeating {repeat} times"
|
||||
)
|
||||
t1 = perf_counter()
|
||||
for _ in range(repeat):
|
||||
res = func()
|
||||
t2 = perf_counter()
|
||||
t = (t2 - t1) / repeat
|
||||
logger.info(f"Time for {name}: {t:.3f} seconds")
|
||||
return res, t, repeat
|
||||
|
||||
|
||||
def refine_distances_knn(
|
||||
xq: np.ndarray,
|
||||
xb: np.ndarray,
|
||||
I: np.ndarray,
|
||||
metric,
|
||||
):
|
||||
"""Recompute distances between xq[i] and xb[I[i, :]]"""
|
||||
nq, k = I.shape
|
||||
xq = np.ascontiguousarray(xq, dtype="float32")
|
||||
nq2, d = xq.shape
|
||||
xb = np.ascontiguousarray(xb, dtype="float32")
|
||||
nb, d2 = xb.shape
|
||||
I = np.ascontiguousarray(I, dtype="int64")
|
||||
assert nq2 == nq
|
||||
assert d2 == d
|
||||
D = np.empty(I.shape, dtype="float32")
|
||||
D[:] = np.inf
|
||||
if metric == faiss.METRIC_L2:
|
||||
faiss.fvec_L2sqr_by_idx(
|
||||
faiss.swig_ptr(D),
|
||||
faiss.swig_ptr(xq),
|
||||
faiss.swig_ptr(xb),
|
||||
faiss.swig_ptr(I),
|
||||
d,
|
||||
nq,
|
||||
k,
|
||||
)
|
||||
else:
|
||||
faiss.fvec_inner_products_by_idx(
|
||||
faiss.swig_ptr(D),
|
||||
faiss.swig_ptr(xq),
|
||||
faiss.swig_ptr(xb),
|
||||
faiss.swig_ptr(I),
|
||||
d,
|
||||
nq,
|
||||
k,
|
||||
)
|
||||
return D
|
||||
|
||||
|
||||
def refine_distances_range(
|
||||
lims: np.ndarray,
|
||||
D: np.ndarray,
|
||||
I: np.ndarray,
|
||||
xq: np.ndarray,
|
||||
xb: np.ndarray,
|
||||
metric,
|
||||
):
|
||||
with ThreadPool(32) as pool:
|
||||
R = pool.map(
|
||||
lambda i: (
|
||||
np.sum(np.square(xq[i] - xb[I[lims[i] : lims[i + 1]]]), axis=1)
|
||||
if metric == faiss.METRIC_L2
|
||||
else np.tensordot(
|
||||
xq[i], xb[I[lims[i] : lims[i + 1]]], axes=(0, 1)
|
||||
)
|
||||
)
|
||||
if lims[i + 1] > lims[i]
|
||||
else [],
|
||||
range(len(lims) - 1),
|
||||
)
|
||||
return np.hstack(R)
|
||||
|
||||
|
||||
def distance_ratio_measure(I, R, D_GT, metric):
|
||||
sum_of_R = np.sum(np.where(I >= 0, R, 0))
|
||||
sum_of_D_GT = np.sum(np.where(I >= 0, D_GT, 0))
|
||||
if metric == faiss.METRIC_INNER_PRODUCT:
|
||||
return (sum_of_R / sum_of_D_GT).item()
|
||||
elif metric == faiss.METRIC_L2:
|
||||
return (sum_of_D_GT / sum_of_R).item()
|
||||
else:
|
||||
raise RuntimeError(f"unknown metric {metric}")
|
||||
|
||||
|
||||
@functools.cache
|
||||
def get_cpu_info():
|
||||
return [l for l in open("/proc/cpuinfo", "r") if "model name" in l][0][
|
||||
13:
|
||||
].strip()
|
||||
|
||||
|
||||
def dict_merge(target, source):
|
||||
for k, v in source.items():
|
||||
if isinstance(v, dict) and k in target:
|
||||
dict_merge(target[k], v)
|
||||
else:
|
||||
target[k] = v
|
||||
|
||||
|
||||
class Cost:
|
||||
def __init__(self, values):
|
||||
self.values = values
|
||||
|
||||
def __le__(self, other):
|
||||
return all(
|
||||
v1 <= v2 for v1, v2 in zip(self.values, other.values, strict=True)
|
||||
)
|
||||
|
||||
def __lt__(self, other):
|
||||
return all(
|
||||
v1 < v2 for v1, v2 in zip(self.values, other.values, strict=True)
|
||||
)
|
||||
|
||||
|
||||
class ParetoMode(Enum):
|
||||
DISABLE = 1 # no Pareto filtering
|
||||
INDEX = 2 # index-local optima
|
||||
GLOBAL = 3 # global optima
|
||||
|
||||
|
||||
class ParetoMetric(Enum):
|
||||
TIME = 0 # time vs accuracy
|
||||
SPACE = 1 # space vs accuracy
|
||||
TIME_SPACE = 2 # (time, space) vs accuracy
|
||||
|
||||
|
||||
def range_search_recall_at_precision(experiment, precision):
|
||||
return round(
|
||||
max(
|
||||
r
|
||||
for r, p in zip(
|
||||
experiment["range_search_pr"]["recall"],
|
||||
experiment["range_search_pr"]["precision"],
|
||||
)
|
||||
if p > precision
|
||||
),
|
||||
6,
|
||||
)
|
||||
|
||||
|
||||
def filter_results(
|
||||
results,
|
||||
evaluation,
|
||||
accuracy_metric, # str or func
|
||||
time_metric=None, # func or None -> use default
|
||||
space_metric=None, # func or None -> use default
|
||||
min_accuracy=0,
|
||||
max_space=0,
|
||||
max_time=0,
|
||||
scaling_factor=1.0,
|
||||
name_filter=None, # func
|
||||
pareto_mode=ParetoMode.DISABLE,
|
||||
pareto_metric=ParetoMetric.TIME,
|
||||
):
|
||||
if isinstance(accuracy_metric, str):
|
||||
accuracy_key = accuracy_metric
|
||||
accuracy_metric = lambda v: v[accuracy_key]
|
||||
|
||||
if time_metric is None:
|
||||
time_metric = lambda v: v["time"] * scaling_factor + (
|
||||
v["quantizer"]["time"] if "quantizer" in v else 0
|
||||
)
|
||||
|
||||
if space_metric is None:
|
||||
space_metric = lambda v: results["indices"][v["codec"]]["code_size"]
|
||||
|
||||
fe = []
|
||||
ops = {}
|
||||
if pareto_mode == ParetoMode.GLOBAL:
|
||||
op = OperatingPoints()
|
||||
ops["global"] = op
|
||||
for k, v in results["experiments"].items():
|
||||
if f".{evaluation}" in k:
|
||||
accuracy = accuracy_metric(v)
|
||||
if min_accuracy > 0 and accuracy < min_accuracy:
|
||||
continue
|
||||
space = space_metric(v)
|
||||
if space is None:
|
||||
space = 0
|
||||
if max_space > 0 and space > max_space:
|
||||
continue
|
||||
time = time_metric(v)
|
||||
if max_time > 0 and time > max_time:
|
||||
continue
|
||||
idx_name = v["index"] + (
|
||||
"snap"
|
||||
if "search_params" in v and v["search_params"]["snap"] == 1
|
||||
else ""
|
||||
)
|
||||
if name_filter is not None and not name_filter(idx_name):
|
||||
continue
|
||||
experiment = (accuracy, space, time, k, v)
|
||||
if pareto_mode == ParetoMode.DISABLE:
|
||||
fe.append(experiment)
|
||||
continue
|
||||
if pareto_mode == ParetoMode.INDEX:
|
||||
if idx_name not in ops:
|
||||
ops[idx_name] = OperatingPoints()
|
||||
op = ops[idx_name]
|
||||
if pareto_metric == ParetoMetric.TIME:
|
||||
op.add_operating_point(experiment, accuracy, time)
|
||||
elif pareto_metric == ParetoMetric.SPACE:
|
||||
op.add_operating_point(experiment, accuracy, space)
|
||||
else:
|
||||
op.add_operating_point(
|
||||
experiment, accuracy, Cost([time, space])
|
||||
)
|
||||
|
||||
if ops:
|
||||
for op in ops.values():
|
||||
for v, _, _ in op.operating_points:
|
||||
fe.append(v)
|
||||
|
||||
fe.sort()
|
||||
return fe
|
||||
Reference in New Issue
Block a user