fix: logs
This commit is contained in:
@@ -17,12 +17,20 @@ import logging
|
|||||||
|
|
||||||
# Set up logging based on environment variable
|
# Set up logging based on environment variable
|
||||||
LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
|
LOG_LEVEL = os.getenv("LEANN_LOG_LEVEL", "WARNING").upper()
|
||||||
logging.basicConfig(
|
|
||||||
level=getattr(logging, LOG_LEVEL, logging.INFO),
|
|
||||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
||||||
)
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Force set logger level (don't rely on basicConfig in subprocess)
|
||||||
|
log_level = getattr(logging, LOG_LEVEL, logging.WARNING)
|
||||||
|
logger.setLevel(log_level)
|
||||||
|
|
||||||
|
# Ensure we have a handler if none exists
|
||||||
|
if not logger.handlers:
|
||||||
|
handler = logging.StreamHandler()
|
||||||
|
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
|
||||||
|
handler.setFormatter(formatter)
|
||||||
|
logger.addHandler(handler)
|
||||||
|
logger.propagate = False
|
||||||
|
|
||||||
|
|
||||||
def create_hnsw_embedding_server(
|
def create_hnsw_embedding_server(
|
||||||
passages_file: Optional[str] = None,
|
passages_file: Optional[str] = None,
|
||||||
|
|||||||
@@ -55,8 +55,8 @@ def compute_embeddings_sentence_transformers(
|
|||||||
"""
|
"""
|
||||||
Compute embeddings using SentenceTransformer with model caching
|
Compute embeddings using SentenceTransformer with model caching
|
||||||
"""
|
"""
|
||||||
print(
|
logger.info(
|
||||||
f"INFO: Computing embeddings for {len(texts)} texts using SentenceTransformer, model: '{model_name}'"
|
f"Computing embeddings for {len(texts)} texts using SentenceTransformer, model: '{model_name}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Auto-detect device
|
# Auto-detect device
|
||||||
@@ -73,13 +73,13 @@ def compute_embeddings_sentence_transformers(
|
|||||||
|
|
||||||
# Check if model is already cached
|
# Check if model is already cached
|
||||||
if cache_key in _model_cache:
|
if cache_key in _model_cache:
|
||||||
print(f"INFO: Using cached model: {model_name}")
|
logger.info(f"Using cached model: {model_name}")
|
||||||
model = _model_cache[cache_key]
|
model = _model_cache[cache_key]
|
||||||
else:
|
else:
|
||||||
print(f"INFO: Loading and caching SentenceTransformer model: {model_name}")
|
logger.info(f"Loading and caching SentenceTransformer model: {model_name}")
|
||||||
from sentence_transformers import SentenceTransformer
|
from sentence_transformers import SentenceTransformer
|
||||||
|
|
||||||
print(f"INFO: Using device: {device}")
|
logger.info(f"Using device: {device}")
|
||||||
|
|
||||||
# Prepare model and tokenizer optimization parameters
|
# Prepare model and tokenizer optimization parameters
|
||||||
model_kwargs = {
|
model_kwargs = {
|
||||||
@@ -104,9 +104,9 @@ def compute_embeddings_sentence_transformers(
|
|||||||
tokenizer_kwargs=tokenizer_kwargs,
|
tokenizer_kwargs=tokenizer_kwargs,
|
||||||
local_files_only=True,
|
local_files_only=True,
|
||||||
)
|
)
|
||||||
print("✅ Model loaded successfully! (local + optimized)")
|
logger.info("Model loaded successfully! (local + optimized)")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"Local loading failed ({e}), trying network download...")
|
logger.warning(f"Local loading failed ({e}), trying network download...")
|
||||||
# Fallback to network loading
|
# Fallback to network loading
|
||||||
model_kwargs["local_files_only"] = False
|
model_kwargs["local_files_only"] = False
|
||||||
tokenizer_kwargs["local_files_only"] = False
|
tokenizer_kwargs["local_files_only"] = False
|
||||||
@@ -118,23 +118,23 @@ def compute_embeddings_sentence_transformers(
|
|||||||
tokenizer_kwargs=tokenizer_kwargs,
|
tokenizer_kwargs=tokenizer_kwargs,
|
||||||
local_files_only=False,
|
local_files_only=False,
|
||||||
)
|
)
|
||||||
print("✅ Model loaded successfully! (network + optimized)")
|
logger.info("Model loaded successfully! (network + optimized)")
|
||||||
|
|
||||||
# Apply additional optimizations (if supported)
|
# Apply additional optimizations (if supported)
|
||||||
if use_fp16 and device in ["cuda", "mps"]:
|
if use_fp16 and device in ["cuda", "mps"]:
|
||||||
try:
|
try:
|
||||||
model = model.half()
|
model = model.half()
|
||||||
model = torch.compile(model)
|
model = torch.compile(model)
|
||||||
print(f"✅ Using FP16 precision and compile optimization: {model_name}")
|
logger.info(f"Using FP16 precision and compile optimization: {model_name}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"FP16 or compile optimization failed: {e}")
|
logger.warning(f"FP16 or compile optimization failed: {e}")
|
||||||
|
|
||||||
# Cache the model
|
# Cache the model
|
||||||
_model_cache[cache_key] = model
|
_model_cache[cache_key] = model
|
||||||
print(f"✅ Model cached: {cache_key}")
|
logger.info(f"Model cached: {cache_key}")
|
||||||
|
|
||||||
# Compute embeddings
|
# Compute embeddings
|
||||||
print("INFO: Starting embedding computation...")
|
logger.info("Starting embedding computation...")
|
||||||
|
|
||||||
embeddings = model.encode(
|
embeddings = model.encode(
|
||||||
texts,
|
texts,
|
||||||
@@ -178,10 +178,10 @@ def compute_embeddings_openai(texts: List[str], model_name: str) -> np.ndarray:
|
|||||||
else:
|
else:
|
||||||
client = openai.OpenAI(api_key=api_key)
|
client = openai.OpenAI(api_key=api_key)
|
||||||
_model_cache[cache_key] = client
|
_model_cache[cache_key] = client
|
||||||
print("✅ OpenAI client cached")
|
logger.info("OpenAI client cached")
|
||||||
|
|
||||||
print(
|
logger.info(
|
||||||
f"INFO: Computing embeddings for {len(texts)} texts using OpenAI API, model: '{model_name}'"
|
f"Computing embeddings for {len(texts)} texts using OpenAI API, model: '{model_name}'"
|
||||||
)
|
)
|
||||||
|
|
||||||
# OpenAI has limits on batch size and input length
|
# OpenAI has limits on batch size and input length
|
||||||
@@ -208,7 +208,7 @@ def compute_embeddings_openai(texts: List[str], model_name: str) -> np.ndarray:
|
|||||||
batch_embeddings = [embedding.embedding for embedding in response.data]
|
batch_embeddings = [embedding.embedding for embedding in response.data]
|
||||||
all_embeddings.extend(batch_embeddings)
|
all_embeddings.extend(batch_embeddings)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"ERROR: Batch {i} failed: {e}")
|
logger.error(f"Batch {i} failed: {e}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
embeddings = np.array(all_embeddings, dtype=np.float32)
|
embeddings = np.array(all_embeddings, dtype=np.float32)
|
||||||
@@ -231,20 +231,20 @@ def compute_embeddings_mlx(
|
|||||||
"MLX or related libraries not available. Install with: uv pip install mlx mlx-lm"
|
"MLX or related libraries not available. Install with: uv pip install mlx mlx-lm"
|
||||||
) from e
|
) from e
|
||||||
|
|
||||||
print(
|
logger.info(
|
||||||
f"INFO: Computing embeddings for {len(chunks)} chunks using MLX model '{model_name}' with batch_size={batch_size}..."
|
f"Computing embeddings for {len(chunks)} chunks using MLX model '{model_name}' with batch_size={batch_size}..."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Cache MLX model and tokenizer
|
# Cache MLX model and tokenizer
|
||||||
cache_key = f"mlx_{model_name}"
|
cache_key = f"mlx_{model_name}"
|
||||||
if cache_key in _model_cache:
|
if cache_key in _model_cache:
|
||||||
print(f"INFO: Using cached MLX model: {model_name}")
|
logger.info(f"Using cached MLX model: {model_name}")
|
||||||
model, tokenizer = _model_cache[cache_key]
|
model, tokenizer = _model_cache[cache_key]
|
||||||
else:
|
else:
|
||||||
print(f"INFO: Loading and caching MLX model: {model_name}")
|
logger.info(f"Loading and caching MLX model: {model_name}")
|
||||||
model, tokenizer = load(model_name)
|
model, tokenizer = load(model_name)
|
||||||
_model_cache[cache_key] = (model, tokenizer)
|
_model_cache[cache_key] = (model, tokenizer)
|
||||||
print(f"✅ MLX model cached: {cache_key}")
|
logger.info(f"MLX model cached: {cache_key}")
|
||||||
|
|
||||||
# Process chunks in batches with progress bar
|
# Process chunks in batches with progress bar
|
||||||
all_embeddings = []
|
all_embeddings = []
|
||||||
|
|||||||
Reference in New Issue
Block a user