add progress bar in build
This commit is contained in:
@@ -224,7 +224,7 @@ async def query_leann_index(index_path: str, query: str):
|
|||||||
query,
|
query,
|
||||||
top_k=20,
|
top_k=20,
|
||||||
recompute_beighbor_embeddings=True,
|
recompute_beighbor_embeddings=True,
|
||||||
complexity=128,
|
complexity=32,
|
||||||
beam_width=1,
|
beam_width=1,
|
||||||
llm_config={
|
llm_config={
|
||||||
"type": "openai",
|
"type": "openai",
|
||||||
@@ -252,7 +252,7 @@ async def main():
|
|||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--index-dir",
|
"--index-dir",
|
||||||
type=str,
|
type=str,
|
||||||
default="./wechat_history_june19_test",
|
default="./wechat_history_magic_test",
|
||||||
help="Directory to store the LEANN index (default: ./wechat_history_index_leann_test)",
|
help="Directory to store the LEANN index (default: ./wechat_history_index_leann_test)",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
Submodule packages/leann-backend-hnsw/third_party/faiss updated: ff22e2c86b...2547df4377
@@ -20,6 +20,7 @@ def compute_embeddings(
|
|||||||
mode: str = "sentence-transformers",
|
mode: str = "sentence-transformers",
|
||||||
use_server: bool = True,
|
use_server: bool = True,
|
||||||
port: Optional[int] = None,
|
port: Optional[int] = None,
|
||||||
|
is_build=False,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Computes embeddings using different backends.
|
Computes embeddings using different backends.
|
||||||
@@ -51,6 +52,7 @@ def compute_embeddings(
|
|||||||
chunks,
|
chunks,
|
||||||
model_name,
|
model_name,
|
||||||
mode=mode,
|
mode=mode,
|
||||||
|
is_build=is_build,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -209,6 +211,7 @@ class LeannBuilder:
|
|||||||
self.embedding_model,
|
self.embedding_model,
|
||||||
self.embedding_mode,
|
self.embedding_mode,
|
||||||
use_server=False,
|
use_server=False,
|
||||||
|
is_build=True,
|
||||||
)
|
)
|
||||||
string_ids = [chunk["id"] for chunk in self.chunks]
|
string_ids = [chunk["id"] for chunk in self.chunks]
|
||||||
current_backend_kwargs = {**self.backend_kwargs, "dimensions": self.dimensions}
|
current_backend_kwargs = {**self.backend_kwargs, "dimensions": self.dimensions}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
|
|||||||
|
|
||||||
|
|
||||||
def compute_embeddings(
|
def compute_embeddings(
|
||||||
texts: List[str], model_name: str, mode: str = "sentence-transformers"
|
texts: List[str], model_name: str, mode: str = "sentence-transformers",is_build: bool = False
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Unified embedding computation entry point
|
Unified embedding computation entry point
|
||||||
@@ -27,7 +27,7 @@ def compute_embeddings(
|
|||||||
Normalized embeddings array, shape: (len(texts), embedding_dim)
|
Normalized embeddings array, shape: (len(texts), embedding_dim)
|
||||||
"""
|
"""
|
||||||
if mode == "sentence-transformers":
|
if mode == "sentence-transformers":
|
||||||
return compute_embeddings_sentence_transformers(texts, model_name)
|
return compute_embeddings_sentence_transformers(texts, model_name, is_build=is_build)
|
||||||
elif mode == "openai":
|
elif mode == "openai":
|
||||||
return compute_embeddings_openai(texts, model_name)
|
return compute_embeddings_openai(texts, model_name)
|
||||||
elif mode == "mlx":
|
elif mode == "mlx":
|
||||||
@@ -42,6 +42,7 @@ def compute_embeddings_sentence_transformers(
|
|||||||
use_fp16: bool = True,
|
use_fp16: bool = True,
|
||||||
device: str = "auto",
|
device: str = "auto",
|
||||||
batch_size: int = 32,
|
batch_size: int = 32,
|
||||||
|
is_build: bool = False,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Compute embeddings using SentenceTransformer
|
Compute embeddings using SentenceTransformer
|
||||||
@@ -133,7 +134,7 @@ def compute_embeddings_sentence_transformers(
|
|||||||
embeddings = model.encode(
|
embeddings = model.encode(
|
||||||
texts,
|
texts,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
show_progress_bar=False, # Don't show progress bar in server environment
|
show_progress_bar=is_build, # Don't show progress bar in server environment
|
||||||
convert_to_numpy=True,
|
convert_to_numpy=True,
|
||||||
normalize_embeddings=False, # Keep consistent with original API behavior
|
normalize_embeddings=False, # Keep consistent with original API behavior
|
||||||
device=device,
|
device=device,
|
||||||
|
|||||||
Reference in New Issue
Block a user