From 31fd3c816aa0cd79975d58c64eb0f58a6c73a7a0 Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Sat, 2 Aug 2025 18:53:55 -0700 Subject: [PATCH] fix: update default embedding models for better performance - Change WeChat, Browser, and Email RAG examples to use all-MiniLM-L6-v2 - Previous Qwen/Qwen3-Embedding-0.6B was too slow for these use cases - all-MiniLM-L6-v2 is a fast 384-dim model, ideal for large-scale personal data --- examples/browser_rag.py | 5 +++++ examples/email_rag.py | 5 +++++ examples/wechat_rag.py | 4 +++- 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/browser_rag.py b/examples/browser_rag.py index 6003bd1..8e9f9bb 100644 --- a/examples/browser_rag.py +++ b/examples/browser_rag.py @@ -18,6 +18,11 @@ class BrowserRAG(BaseRAGExample): """RAG example for Chrome browser history.""" def __init__(self): + # Set default values BEFORE calling super().__init__ + self.embedding_model_default = ( + "sentence-transformers/all-MiniLM-L6-v2" # Fast 384-dim model + ) + super().__init__( name="Browser History", description="Process and query Chrome browser history with LEANN", diff --git a/examples/email_rag.py b/examples/email_rag.py index 36fdc3f..b3c5483 100644 --- a/examples/email_rag.py +++ b/examples/email_rag.py @@ -17,6 +17,11 @@ class EmailRAG(BaseRAGExample): """RAG example for Apple Mail processing.""" def __init__(self): + # Set default values BEFORE calling super().__init__ + self.embedding_model_default = ( + "sentence-transformers/all-MiniLM-L6-v2" # Fast 384-dim model + ) + super().__init__( name="Email", description="Process and query Apple Mail emails with LEANN", diff --git a/examples/wechat_rag.py b/examples/wechat_rag.py index aa8b987..a071f89 100644 --- a/examples/wechat_rag.py +++ b/examples/wechat_rag.py @@ -20,7 +20,9 @@ class WeChatRAG(BaseRAGExample): def __init__(self): # Set default values BEFORE calling super().__init__ self.max_items_default = 50 # Match original default - self.embedding_model_default = "Qwen/Qwen3-Embedding-0.6B" # Match original default + self.embedding_model_default = ( + "sentence-transformers/all-MiniLM-L6-v2" # Fast 384-dim model + ) super().__init__( name="WeChat History",