perf: reuse embedding server for query embed
This commit is contained in:
@@ -1,7 +1,3 @@
|
||||
import faulthandler
|
||||
|
||||
faulthandler.enable()
|
||||
|
||||
import argparse
|
||||
from llama_index.core import SimpleDirectoryReader, Settings
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
@@ -62,7 +58,7 @@ async def main(args):
|
||||
|
||||
print(f"\n[PHASE 2] Starting Leann chat session...")
|
||||
|
||||
llm_config = {"type": "hf", "model": "Qwen/Qwen3-8B"}
|
||||
llm_config = {"type": "hf", "model": "Qwen/Qwen3-4B"}
|
||||
|
||||
chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user