fix: more logs

This commit is contained in:
Andy Lee
2025-07-21 23:08:45 -07:00
parent 046d457d22
commit ab72a2ab9d
2 changed files with 34 additions and 22 deletions

View File

@@ -14,6 +14,9 @@ from dataclasses import dataclass, field
from .registry import BACKEND_REGISTRY from .registry import BACKEND_REGISTRY
from .interface import LeannBackendFactoryInterface from .interface import LeannBackendFactoryInterface
from .chat import get_llm from .chat import get_llm
import logging
logger = logging.getLogger(__name__)
def compute_embeddings( def compute_embeddings(
@@ -67,8 +70,8 @@ def compute_embeddings_via_server(
chunks: List of text chunks to embed chunks: List of text chunks to embed
model_name: Name of the sentence transformer model model_name: Name of the sentence transformer model
""" """
print( logger.info(
f"INFO: Computing embeddings for {len(chunks)} chunks using SentenceTransformer model '{model_name}' (via embedding server)..." f"Computing embeddings for {len(chunks)} chunks using SentenceTransformer model '{model_name}' (via embedding server)..."
) )
import zmq import zmq
import msgpack import msgpack
@@ -288,7 +291,7 @@ class LeannBuilder:
f"Dimension mismatch: expected {self.dimensions}, got {embedding_dim}" f"Dimension mismatch: expected {self.dimensions}, got {embedding_dim}"
) )
print( logger.info(
f"Building index from precomputed embeddings: {len(ids)} items, {embedding_dim} dimensions" f"Building index from precomputed embeddings: {len(ids)} items, {embedding_dim} dimensions"
) )
@@ -296,7 +299,7 @@ class LeannBuilder:
if len(self.chunks) != len(ids): if len(self.chunks) != len(ids):
# If no text chunks provided, create placeholder text entries # If no text chunks provided, create placeholder text entries
if not self.chunks: if not self.chunks:
print("No text chunks provided, creating placeholder entries...") logger.info("No text chunks provided, creating placeholder entries...")
for id_val in ids: for id_val in ids:
self.add_text( self.add_text(
f"Document {id_val}", f"Document {id_val}",
@@ -371,7 +374,9 @@ class LeannBuilder:
with open(leann_meta_path, "w", encoding="utf-8") as f: with open(leann_meta_path, "w", encoding="utf-8") as f:
json.dump(meta_data, f, indent=2) json.dump(meta_data, f, indent=2)
print(f"Index built successfully from precomputed embeddings: {index_path}") logger.info(
f"Index built successfully from precomputed embeddings: {index_path}"
)
class LeannSearcher: class LeannSearcher:
@@ -411,10 +416,10 @@ class LeannSearcher:
expected_zmq_port: int = 5557, expected_zmq_port: int = 5557,
**kwargs, **kwargs,
) -> List[SearchResult]: ) -> List[SearchResult]:
print("🔍 DEBUG LeannSearcher.search() called:") logger.info("🔍 LeannSearcher.search() called:")
print(f" Query: '{query}'") logger.info(f" Query: '{query}'")
print(f" Top_k: {top_k}") logger.info(f" Top_k: {top_k}")
print(f" Additional kwargs: {kwargs}") logger.info(f" Additional kwargs: {kwargs}")
start_time = time.time() start_time = time.time()
@@ -432,9 +437,9 @@ class LeannSearcher:
use_server_if_available=recompute_embeddings, use_server_if_available=recompute_embeddings,
zmq_port=zmq_port, zmq_port=zmq_port,
) )
print(f" Generated embedding shape: {query_embedding.shape}") logger.info(f" Generated embedding shape: {query_embedding.shape}")
embedding_time = time.time() - start_time embedding_time = time.time() - start_time
print(f" Embedding time: {embedding_time} seconds") logger.info(f" Embedding time: {embedding_time} seconds")
start_time = time.time() start_time = time.time()
results = self.backend_impl.search( results = self.backend_impl.search(
@@ -449,14 +454,14 @@ class LeannSearcher:
**kwargs, **kwargs,
) )
search_time = time.time() - start_time search_time = time.time() - start_time
print(f" Search time: {search_time} seconds") logger.info(f" Search time: {search_time} seconds")
print( logger.info(
f" Backend returned: labels={len(results.get('labels', [[]])[0])} results" f" Backend returned: labels={len(results.get('labels', [[]])[0])} results"
) )
enriched_results = [] enriched_results = []
if "labels" in results and "distances" in results: if "labels" in results and "distances" in results:
print(f" Processing {len(results['labels'][0])} passage IDs:") logger.info(f" Processing {len(results['labels'][0])} passage IDs:")
for i, (string_id, dist) in enumerate( for i, (string_id, dist) in enumerate(
zip(results["labels"][0], results["distances"][0]) zip(results["labels"][0], results["distances"][0])
): ):
@@ -470,15 +475,15 @@ class LeannSearcher:
metadata=passage_data.get("metadata", {}), metadata=passage_data.get("metadata", {}),
) )
) )
print( logger.info(
f" {i + 1}. passage_id='{string_id}' -> SUCCESS: {passage_data['text']}..." f" {i + 1}. passage_id='{string_id}' -> SUCCESS: {passage_data['text']}..."
) )
except KeyError: except KeyError:
print( logger.error(
f" {i + 1}. passage_id='{string_id}' -> ERROR: Passage not found in PassageManager!" f" {i + 1}. passage_id='{string_id}' -> ERROR: Passage not found in PassageManager!"
) )
print(f" Final enriched results: {len(enriched_results)} passages") logger.info(f" Final enriched results: {len(enriched_results)} passages")
return enriched_results return enriched_results

View File

@@ -8,8 +8,13 @@ import numpy as np
import torch import torch
from typing import List, Dict, Any from typing import List, Dict, Any
import logging import logging
import os
# Set up logger with proper level
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
log_level = getattr(logging, LOG_LEVEL, logging.WARNING)
logger.setLevel(log_level)
# Global model cache to avoid repeated loading # Global model cache to avoid repeated loading
_model_cache: Dict[str, Any] = {} _model_cache: Dict[str, Any] = {}
@@ -125,7 +130,9 @@ def compute_embeddings_sentence_transformers(
try: try:
model = model.half() model = model.half()
model = torch.compile(model) model = torch.compile(model)
logger.info(f"Using FP16 precision and compile optimization: {model_name}") logger.info(
f"Using FP16 precision and compile optimization: {model_name}"
)
except Exception as e: except Exception as e:
logger.warning(f"FP16 or compile optimization failed: {e}") logger.warning(f"FP16 or compile optimization failed: {e}")
@@ -145,8 +152,8 @@ def compute_embeddings_sentence_transformers(
device=device, device=device,
) )
print( logger.info(
f"INFO: Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}" f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}"
) )
# Validate results # Validate results
@@ -212,8 +219,8 @@ def compute_embeddings_openai(texts: List[str], model_name: str) -> np.ndarray:
raise raise
embeddings = np.array(all_embeddings, dtype=np.float32) embeddings = np.array(all_embeddings, dtype=np.float32)
print( logger.info(
f"INFO: Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}" f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}"
) )
return embeddings return embeddings