perf: reuse embedding server for query embed

This commit is contained in:
Andy Lee
2025-07-16 16:12:15 -07:00
parent 2a1a152073
commit f77c4e38cb
4 changed files with 169 additions and 38 deletions

View File

@@ -1,7 +1,3 @@
import faulthandler
faulthandler.enable()
import argparse
from llama_index.core import SimpleDirectoryReader, Settings
from llama_index.core.node_parser import SentenceSplitter
@@ -62,7 +58,7 @@ async def main(args):
print(f"\n[PHASE 2] Starting Leann chat session...")
llm_config = {"type": "hf", "model": "Qwen/Qwen3-8B"}
llm_config = {"type": "hf", "model": "Qwen/Qwen3-4B"}
chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)