perf: reuse embedding server for query embed

2025-07-16 16:12:15 -07:00
parent 2a1a152073
commit f77c4e38cb
4 changed files with 169 additions and 38 deletions
--- a/examples/main_cli_example.py
+++ b/examples/main_cli_example.py
@@ -1,7 +1,3 @@
-import faulthandler
-
-faulthandler.enable()
-
 import argparse
 from llama_index.core import SimpleDirectoryReader, Settings
 from llama_index.core.node_parser import SentenceSplitter
@@ -62,7 +58,7 @@ async def main(args):

    print(f"\n[PHASE 2] Starting Leann chat session...")

-    llm_config = {"type": "hf", "model": "Qwen/Qwen3-8B"}
+    llm_config = {"type": "hf", "model": "Qwen/Qwen3-4B"}

    chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)