Datastore reproduce (#3)

* fix: diskann zmq port and passages * feat: auto discovery of packages and fix passage gen for diskann * docs: embedding pruning * refactor: passage structure * feat: reproducible research datas, rpj_wiki & dpr * refactor: chat and base searcher * feat: chat on mps
2025-07-11 23:37:23 -07:00
parent 91a026f38b
commit eb6f504789
22 changed files with 5070 additions and 3681 deletions
--- a/examples/main_cli_example.py
+++ b/examples/main_cli_example.py
@@ -1,6 +1,7 @@
 import faulthandler
 faulthandler.enable()

+import argparse
 from llama_index.core import SimpleDirectoryReader, Settings
 from llama_index.core.readers.base import BaseReader
 from llama_index.node_parser.docling import DoclingNodeParser
@@ -69,17 +70,30 @@ if not INDEX_DIR.exists():
 else:
    print(f"--- Using existing index at {INDEX_DIR} ---")

-async def main():
+async def main(args):
    print(f"\n[PHASE 2] Starting Leann chat session...")
-    chat = LeannChat(index_path=INDEX_PATH)
+    
+    llm_config = {
+        "type": args.llm,
+        "model": args.model,
+        "host": args.host
+    }
+
+    chat = LeannChat(index_path=INDEX_PATH, llm_config=llm_config)
    
    query = "Based on the paper, what are the main techniques LEANN explores to reduce the storage overhead and DLPM explore to achieve Fairness and Efiiciency trade-off?"
    query = "What is the main idea of RL and give me 5 exapmle of classic RL algorithms?"
    query = "什么是盘古大模型以及盘古开发过程中遇到了什么阴暗面，任务令一般在什么城市颁发"

    print(f"You: {query}")
-    chat_response = chat.ask(query, top_k=20, recompute_beighbor_embeddings=True,complexity=32,beam_width=1)
+    chat_response = chat.ask(query, top_k=20, recompute_beighbor_embeddings=True, complexity=32)
    print(f"Leann: {chat_response}")

 if __name__ == "__main__":
-    asyncio.run(main())
+    parser = argparse.ArgumentParser(description="Run Leann Chat with various LLM backends.")
+    parser.add_argument("--llm", type=str, default="hf", choices=["simulated", "ollama", "hf", "openai"], help="The LLM backend to use.")
+    parser.add_argument("--model", type=str, default='meta-llama/Llama-3.2-3B-Instruct', help="The model name to use (e.g., 'llama3:8b' for ollama, 'deepseek-ai/deepseek-llm-7b-chat' for hf, 'gpt-4o' for openai).")
+    parser.add_argument("--host", type=str, default="http://localhost:11434", help="The host for the Ollama API.")
+    args = parser.parse_args()
+
+    asyncio.run(main(args))