From 85277ba67a65ba926aa50331c1145d617e907049 Mon Sep 17 00:00:00 2001 From: yichuan520030910320 Date: Sun, 3 Aug 2025 23:02:06 -0700 Subject: [PATCH] fix wechat --- README.md | 2 +- examples/email_rag.py | 1 + examples/wechat_rag.py | 3 ++- packages/leann-core/src/leann/api.py | 3 ++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 2150613..d04620d 100755 --- a/README.md +++ b/README.md @@ -192,7 +192,7 @@ All RAG examples share these common parameters. **Interactive mode** is availabl # Search Parameters --top-k N # Number of results to retrieve (default: 20) ---search-complexity N # Search complexity for graph traversal (default: 64) +--search-complexity N # Search complexity for graph traversal (default: 32) # Chunking Parameters --chunk-size N # Size of text chunks (default varies by source: 256 for most, 192 for WeChat) diff --git a/examples/email_rag.py b/examples/email_rag.py index 5d040b0..2cc8fa9 100644 --- a/examples/email_rag.py +++ b/examples/email_rag.py @@ -18,6 +18,7 @@ class EmailRAG(BaseRAGExample): def __init__(self): # Set default values BEFORE calling super().__init__ + self.max_items_default = -1 # Process all emails by default self.embedding_model_default = ( "sentence-transformers/all-MiniLM-L6-v2" # Fast 384-dim model ) diff --git a/examples/wechat_rag.py b/examples/wechat_rag.py index f127f3f..6cb2259 100644 --- a/examples/wechat_rag.py +++ b/examples/wechat_rag.py @@ -19,7 +19,7 @@ class WeChatRAG(BaseRAGExample): def __init__(self): # Set default values BEFORE calling super().__init__ - self.max_items_default = 50 # Match original default + self.max_items_default = -1 # Match original default self.embedding_model_default = ( "sentence-transformers/all-MiniLM-L6-v2" # Fast 384-dim model ) @@ -143,6 +143,7 @@ class WeChatRAG(BaseRAGExample): return [] print(f"\nTotal loaded {len(all_documents)} chat documents from {len(export_dirs)} exports") + print("now starting to split into text chunks ... take some time") # Convert to text chunks with contact information all_texts = [] diff --git a/packages/leann-core/src/leann/api.py b/packages/leann-core/src/leann/api.py index a9f2ff4..39710df 100644 --- a/packages/leann-core/src/leann/api.py +++ b/packages/leann-core/src/leann/api.py @@ -463,8 +463,9 @@ class LeannSearcher: print( f"Leann metadata file not found at {self.meta_path_str}, and you may need to rm -rf {parent_dir}" ) + # highlight in red the filenotfound error raise FileNotFoundError( - f"Leann metadata file not found at {self.meta_path_str}, you may need to rm -rf {parent_dir}" + f"Leann metadata file not found at {self.meta_path_str}, \033[91m you may need to rm -rf {parent_dir}\033[0m" ) with open(self.meta_path_str, encoding="utf-8") as f: self.meta_data = json.load(f)