From 88ca09440ddab5ed6a05e24995f3b404926c0815 Mon Sep 17 00:00:00 2001 From: Fangzhou66 Date: Sat, 12 Jul 2025 16:13:15 -0700 Subject: [PATCH] fix some hf problem --- examples/google_history_reader_leann.py | 13 +++++++++---- packages/leann-core/src/leann/chat.py | 13 +++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/examples/google_history_reader_leann.py b/examples/google_history_reader_leann.py index f8acf90..4e92f7d 100644 --- a/examples/google_history_reader_leann.py +++ b/examples/google_history_reader_leann.py @@ -1,12 +1,17 @@ import os import asyncio -import dotenv +try: + import dotenv + dotenv.load_dotenv() +except ModuleNotFoundError: + # python-dotenv is not installed; skip loading environment variables + dotenv = None from pathlib import Path from typing import List, Any from leann.api import LeannBuilder, LeannSearcher, LeannChat from llama_index.core.node_parser import SentenceSplitter -dotenv.load_dotenv() +# dotenv.load_dotenv() # handled above if python-dotenv is available def create_leann_index_from_multiple_chrome_profiles(profile_dirs: List[Path], index_path: str = "chrome_history_index.leann", max_count: int = -1): """ @@ -190,7 +195,7 @@ async def query_leann_index(index_path: str, query: str): query: The query string """ print(f"\n[PHASE 2] Starting Leann chat session...") - chat = LeannChat(index_path=index_path) + chat = LeannChat(index_path=index_path, llm_config={"type": "hf", "model": "Qwen/Qwen3-0.6B"}) print(f"You: {query}") chat_response = chat.ask( @@ -227,7 +232,7 @@ async def main(): return # Create or load the LEANN index from all sources - index_path = create_leann_index_from_multiple_chrome_profiles(profile_dirs, INDEX_PATH) + index_path = create_leann_index_from_multiple_chrome_profiles(profile_dirs, INDEX_PATH,1000) if index_path: # Example queries diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py index 47b6e41..62d68a5 100644 --- a/packages/leann-core/src/leann/chat.py +++ b/packages/leann-core/src/leann/chat.py @@ -128,6 +128,19 @@ class HFChat(LLMInterface): self.pipeline = pipeline("text-generation", model=model_name, device=device) def ask(self, prompt: str, **kwargs) -> str: + # Map OpenAI-style arguments to Hugging Face equivalents + if "max_tokens" in kwargs: + # Prefer user-provided max_new_tokens if both are present + kwargs.setdefault("max_new_tokens", kwargs["max_tokens"]) + # Remove the unsupported key to avoid errors in Transformers + kwargs.pop("max_tokens") + + # Handle temperature=0 edge-case for greedy decoding + if "temperature" in kwargs and kwargs["temperature"] == 0.0: + # Remove unsupported zero temperature and use deterministic generation + kwargs.pop("temperature") + kwargs.setdefault("do_sample", False) + # Sensible defaults for text generation params = { "max_length": 500,