add progress bar in build

This commit is contained in:
yichuan520030910320
2025-07-21 20:55:18 -07:00
parent 2f224f5793
commit 530f6e4af5
4 changed files with 10 additions and 6 deletions

View File

@@ -224,7 +224,7 @@ async def query_leann_index(index_path: str, query: str):
query,
top_k=20,
recompute_beighbor_embeddings=True,
complexity=128,
complexity=32,
beam_width=1,
llm_config={
"type": "openai",
@@ -252,7 +252,7 @@ async def main():
parser.add_argument(
"--index-dir",
type=str,
default="./wechat_history_june19_test",
default="./wechat_history_magic_test",
help="Directory to store the LEANN index (default: ./wechat_history_index_leann_test)",
)
parser.add_argument(

View File

@@ -20,6 +20,7 @@ def compute_embeddings(
mode: str = "sentence-transformers",
use_server: bool = True,
port: Optional[int] = None,
is_build=False,
) -> np.ndarray:
"""
Computes embeddings using different backends.
@@ -51,6 +52,7 @@ def compute_embeddings(
chunks,
model_name,
mode=mode,
is_build=is_build,
)
@@ -209,6 +211,7 @@ class LeannBuilder:
self.embedding_model,
self.embedding_mode,
use_server=False,
is_build=True,
)
string_ids = [chunk["id"] for chunk in self.chunks]
current_backend_kwargs = {**self.backend_kwargs, "dimensions": self.dimensions}

View File

@@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
def compute_embeddings(
texts: List[str], model_name: str, mode: str = "sentence-transformers"
texts: List[str], model_name: str, mode: str = "sentence-transformers",is_build: bool = False
) -> np.ndarray:
"""
Unified embedding computation entry point
@@ -27,7 +27,7 @@ def compute_embeddings(
Normalized embeddings array, shape: (len(texts), embedding_dim)
"""
if mode == "sentence-transformers":
return compute_embeddings_sentence_transformers(texts, model_name)
return compute_embeddings_sentence_transformers(texts, model_name, is_build=is_build)
elif mode == "openai":
return compute_embeddings_openai(texts, model_name)
elif mode == "mlx":
@@ -42,6 +42,7 @@ def compute_embeddings_sentence_transformers(
use_fp16: bool = True,
device: str = "auto",
batch_size: int = 32,
is_build: bool = False,
) -> np.ndarray:
"""
Compute embeddings using SentenceTransformer
@@ -133,7 +134,7 @@ def compute_embeddings_sentence_transformers(
embeddings = model.encode(
texts,
batch_size=batch_size,
show_progress_bar=False, # Don't show progress bar in server environment
show_progress_bar=is_build, # Don't show progress bar in server environment
convert_to_numpy=True,
normalize_embeddings=False, # Keep consistent with original API behavior
device=device,