From c2f35c8e73dd0e7c6072b508cfc593967459683f Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Mon, 21 Jul 2025 23:02:13 -0700 Subject: [PATCH] fix: logs --- .../hnsw_embedding_server.py | 16 +++++-- .../leann-core/src/leann/embedding_compute.py | 42 +++++++++---------- 2 files changed, 33 insertions(+), 25 deletions(-) diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py index 94b6529..07e74c4 100644 --- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py @@ -17,12 +17,20 @@ import logging # Set up logging based on environment variable LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper() -logging.basicConfig( - level=getattr(logging, LOG_LEVEL, logging.INFO), - format="%(asctime)s - %(levelname)s - %(message)s", -) logger = logging.getLogger(__name__) +# Force set logger level (don't rely on basicConfig in subprocess) +log_level = getattr(logging, LOG_LEVEL, logging.WARNING) +logger.setLevel(log_level) + +# Ensure we have a handler if none exists +if not logger.handlers: + handler = logging.StreamHandler() + formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + logger.addHandler(handler) + logger.propagate = False + def create_hnsw_embedding_server( passages_file: Optional[str] = None, diff --git a/packages/leann-core/src/leann/embedding_compute.py b/packages/leann-core/src/leann/embedding_compute.py index 38dde43..d0df463 100644 --- a/packages/leann-core/src/leann/embedding_compute.py +++ b/packages/leann-core/src/leann/embedding_compute.py @@ -55,8 +55,8 @@ def compute_embeddings_sentence_transformers( """ Compute embeddings using SentenceTransformer with model caching """ - print( - f"INFO: Computing embeddings for {len(texts)} texts using SentenceTransformer, model: '{model_name}'" + logger.info( + f"Computing embeddings for {len(texts)} texts using SentenceTransformer, model: '{model_name}'" ) # Auto-detect device @@ -73,13 +73,13 @@ def compute_embeddings_sentence_transformers( # Check if model is already cached if cache_key in _model_cache: - print(f"INFO: Using cached model: {model_name}") + logger.info(f"Using cached model: {model_name}") model = _model_cache[cache_key] else: - print(f"INFO: Loading and caching SentenceTransformer model: {model_name}") + logger.info(f"Loading and caching SentenceTransformer model: {model_name}") from sentence_transformers import SentenceTransformer - print(f"INFO: Using device: {device}") + logger.info(f"Using device: {device}") # Prepare model and tokenizer optimization parameters model_kwargs = { @@ -104,9 +104,9 @@ def compute_embeddings_sentence_transformers( tokenizer_kwargs=tokenizer_kwargs, local_files_only=True, ) - print("✅ Model loaded successfully! (local + optimized)") + logger.info("Model loaded successfully! (local + optimized)") except Exception as e: - print(f"Local loading failed ({e}), trying network download...") + logger.warning(f"Local loading failed ({e}), trying network download...") # Fallback to network loading model_kwargs["local_files_only"] = False tokenizer_kwargs["local_files_only"] = False @@ -118,23 +118,23 @@ def compute_embeddings_sentence_transformers( tokenizer_kwargs=tokenizer_kwargs, local_files_only=False, ) - print("✅ Model loaded successfully! (network + optimized)") + logger.info("Model loaded successfully! (network + optimized)") # Apply additional optimizations (if supported) if use_fp16 and device in ["cuda", "mps"]: try: model = model.half() model = torch.compile(model) - print(f"✅ Using FP16 precision and compile optimization: {model_name}") + logger.info(f"Using FP16 precision and compile optimization: {model_name}") except Exception as e: - print(f"FP16 or compile optimization failed: {e}") + logger.warning(f"FP16 or compile optimization failed: {e}") # Cache the model _model_cache[cache_key] = model - print(f"✅ Model cached: {cache_key}") + logger.info(f"Model cached: {cache_key}") # Compute embeddings - print("INFO: Starting embedding computation...") + logger.info("Starting embedding computation...") embeddings = model.encode( texts, @@ -178,10 +178,10 @@ def compute_embeddings_openai(texts: List[str], model_name: str) -> np.ndarray: else: client = openai.OpenAI(api_key=api_key) _model_cache[cache_key] = client - print("✅ OpenAI client cached") + logger.info("OpenAI client cached") - print( - f"INFO: Computing embeddings for {len(texts)} texts using OpenAI API, model: '{model_name}'" + logger.info( + f"Computing embeddings for {len(texts)} texts using OpenAI API, model: '{model_name}'" ) # OpenAI has limits on batch size and input length @@ -208,7 +208,7 @@ def compute_embeddings_openai(texts: List[str], model_name: str) -> np.ndarray: batch_embeddings = [embedding.embedding for embedding in response.data] all_embeddings.extend(batch_embeddings) except Exception as e: - print(f"ERROR: Batch {i} failed: {e}") + logger.error(f"Batch {i} failed: {e}") raise embeddings = np.array(all_embeddings, dtype=np.float32) @@ -231,20 +231,20 @@ def compute_embeddings_mlx( "MLX or related libraries not available. Install with: uv pip install mlx mlx-lm" ) from e - print( - f"INFO: Computing embeddings for {len(chunks)} chunks using MLX model '{model_name}' with batch_size={batch_size}..." + logger.info( + f"Computing embeddings for {len(chunks)} chunks using MLX model '{model_name}' with batch_size={batch_size}..." ) # Cache MLX model and tokenizer cache_key = f"mlx_{model_name}" if cache_key in _model_cache: - print(f"INFO: Using cached MLX model: {model_name}") + logger.info(f"Using cached MLX model: {model_name}") model, tokenizer = _model_cache[cache_key] else: - print(f"INFO: Loading and caching MLX model: {model_name}") + logger.info(f"Loading and caching MLX model: {model_name}") model, tokenizer = load(model_name) _model_cache[cache_key] = (model, tokenizer) - print(f"✅ MLX model cached: {cache_key}") + logger.info(f"MLX model cached: {cache_key}") # Process chunks in batches with progress bar all_embeddings = []