fix some hf problem

This commit is contained in:
Fangzhou66
2025-07-12 16:13:15 -07:00
parent 8e0ab4a28d
commit 88ca09440d
2 changed files with 22 additions and 4 deletions

View File

@@ -1,12 +1,17 @@
import os import os
import asyncio import asyncio
import dotenv try:
import dotenv
dotenv.load_dotenv()
except ModuleNotFoundError:
# python-dotenv is not installed; skip loading environment variables
dotenv = None
from pathlib import Path from pathlib import Path
from typing import List, Any from typing import List, Any
from leann.api import LeannBuilder, LeannSearcher, LeannChat from leann.api import LeannBuilder, LeannSearcher, LeannChat
from llama_index.core.node_parser import SentenceSplitter from llama_index.core.node_parser import SentenceSplitter
dotenv.load_dotenv() # dotenv.load_dotenv() # handled above if python-dotenv is available
def create_leann_index_from_multiple_chrome_profiles(profile_dirs: List[Path], index_path: str = "chrome_history_index.leann", max_count: int = -1): def create_leann_index_from_multiple_chrome_profiles(profile_dirs: List[Path], index_path: str = "chrome_history_index.leann", max_count: int = -1):
""" """
@@ -190,7 +195,7 @@ async def query_leann_index(index_path: str, query: str):
query: The query string query: The query string
""" """
print(f"\n[PHASE 2] Starting Leann chat session...") print(f"\n[PHASE 2] Starting Leann chat session...")
chat = LeannChat(index_path=index_path) chat = LeannChat(index_path=index_path, llm_config={"type": "hf", "model": "Qwen/Qwen3-0.6B"})
print(f"You: {query}") print(f"You: {query}")
chat_response = chat.ask( chat_response = chat.ask(
@@ -227,7 +232,7 @@ async def main():
return return
# Create or load the LEANN index from all sources # Create or load the LEANN index from all sources
index_path = create_leann_index_from_multiple_chrome_profiles(profile_dirs, INDEX_PATH) index_path = create_leann_index_from_multiple_chrome_profiles(profile_dirs, INDEX_PATH,1000)
if index_path: if index_path:
# Example queries # Example queries

View File

@@ -128,6 +128,19 @@ class HFChat(LLMInterface):
self.pipeline = pipeline("text-generation", model=model_name, device=device) self.pipeline = pipeline("text-generation", model=model_name, device=device)
def ask(self, prompt: str, **kwargs) -> str: def ask(self, prompt: str, **kwargs) -> str:
# Map OpenAI-style arguments to Hugging Face equivalents
if "max_tokens" in kwargs:
# Prefer user-provided max_new_tokens if both are present
kwargs.setdefault("max_new_tokens", kwargs["max_tokens"])
# Remove the unsupported key to avoid errors in Transformers
kwargs.pop("max_tokens")
# Handle temperature=0 edge-case for greedy decoding
if "temperature" in kwargs and kwargs["temperature"] == 0.0:
# Remove unsupported zero temperature and use deterministic generation
kwargs.pop("temperature")
kwargs.setdefault("do_sample", False)
# Sensible defaults for text generation # Sensible defaults for text generation
params = { params = {
"max_length": 500, "max_length": 500,