From 530f6e4af520aa35da1a5e00f19979a167918fb2 Mon Sep 17 00:00:00 2001 From: yichuan520030910320 Date: Mon, 21 Jul 2025 20:55:18 -0700 Subject: [PATCH] add progress bar in build --- examples/wechat_history_reader_leann.py | 4 ++-- packages/leann-backend-hnsw/third_party/faiss | 2 +- packages/leann-core/src/leann/api.py | 3 +++ packages/leann-core/src/leann/embedding_compute.py | 7 ++++--- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/wechat_history_reader_leann.py b/examples/wechat_history_reader_leann.py index e15e876..49e04a1 100644 --- a/examples/wechat_history_reader_leann.py +++ b/examples/wechat_history_reader_leann.py @@ -224,7 +224,7 @@ async def query_leann_index(index_path: str, query: str): query, top_k=20, recompute_beighbor_embeddings=True, - complexity=128, + complexity=32, beam_width=1, llm_config={ "type": "openai", @@ -252,7 +252,7 @@ async def main(): parser.add_argument( "--index-dir", type=str, - default="./wechat_history_june19_test", + default="./wechat_history_magic_test", help="Directory to store the LEANN index (default: ./wechat_history_index_leann_test)", ) parser.add_argument( diff --git a/packages/leann-backend-hnsw/third_party/faiss b/packages/leann-backend-hnsw/third_party/faiss index ff22e2c..2547df4 160000 --- a/packages/leann-backend-hnsw/third_party/faiss +++ b/packages/leann-backend-hnsw/third_party/faiss @@ -1 +1 @@ -Subproject commit ff22e2c86be1784c760265abe146b1ab0db90ebe +Subproject commit 2547df4377ae097e2eabc9b019c15135b1fea2b4 diff --git a/packages/leann-core/src/leann/api.py b/packages/leann-core/src/leann/api.py index 6f4c536..75a6d2b 100644 --- a/packages/leann-core/src/leann/api.py +++ b/packages/leann-core/src/leann/api.py @@ -20,6 +20,7 @@ def compute_embeddings( mode: str = "sentence-transformers", use_server: bool = True, port: Optional[int] = None, + is_build=False, ) -> np.ndarray: """ Computes embeddings using different backends. @@ -51,6 +52,7 @@ def compute_embeddings( chunks, model_name, mode=mode, + is_build=is_build, ) @@ -209,6 +211,7 @@ class LeannBuilder: self.embedding_model, self.embedding_mode, use_server=False, + is_build=True, ) string_ids = [chunk["id"] for chunk in self.chunks] current_backend_kwargs = {**self.backend_kwargs, "dimensions": self.dimensions} diff --git a/packages/leann-core/src/leann/embedding_compute.py b/packages/leann-core/src/leann/embedding_compute.py index 20cef9f..c41f009 100644 --- a/packages/leann-core/src/leann/embedding_compute.py +++ b/packages/leann-core/src/leann/embedding_compute.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) def compute_embeddings( - texts: List[str], model_name: str, mode: str = "sentence-transformers" + texts: List[str], model_name: str, mode: str = "sentence-transformers",is_build: bool = False ) -> np.ndarray: """ Unified embedding computation entry point @@ -27,7 +27,7 @@ def compute_embeddings( Normalized embeddings array, shape: (len(texts), embedding_dim) """ if mode == "sentence-transformers": - return compute_embeddings_sentence_transformers(texts, model_name) + return compute_embeddings_sentence_transformers(texts, model_name, is_build=is_build) elif mode == "openai": return compute_embeddings_openai(texts, model_name) elif mode == "mlx": @@ -42,6 +42,7 @@ def compute_embeddings_sentence_transformers( use_fp16: bool = True, device: str = "auto", batch_size: int = 32, + is_build: bool = False, ) -> np.ndarray: """ Compute embeddings using SentenceTransformer @@ -133,7 +134,7 @@ def compute_embeddings_sentence_transformers( embeddings = model.encode( texts, batch_size=batch_size, - show_progress_bar=False, # Don't show progress bar in server environment + show_progress_bar=is_build, # Don't show progress bar in server environment convert_to_numpy=True, normalize_embeddings=False, # Keep consistent with original API behavior device=device,