upd readme wechat application

This commit is contained in:
yichuan520030910320
2025-07-13 22:00:49 -07:00
parent c611d0f30f
commit d038319d8b
2 changed files with 95 additions and 14 deletions

View File

@@ -144,7 +144,7 @@ This ensures the generated files are compatible with your system's protobuf libr
## Applications on your MacBook ## Applications on your MacBook
### light weight RAG on your apple email ### 📧 Lightweight RAG on your Apple Mail
LEANN can create a searchable index of your Apple Mail emails, allowing you to query your email history using natural language. LEANN can create a searchable index of your Apple Mail emails, allowing you to query your email history using natural language.
@@ -259,6 +259,60 @@ Once the index is built, you can ask questions like:
</details> </details>
### 💬 Lightweight RAG on your WeChat History
LEANN can create a searchable index of your WeChat chat history, allowing you to query your conversations using natural language.
#### Prerequisites
<details>
<summary><strong>🔧 Click to expand: Installation Requirements</strong></summary>
First, you need to install the WeChat exporter:
```bash
sudo packages/wechat-exporter/wechattweak-cli install
```
**Troubleshooting**: If you encounter installation issues, check the [WeChatTweak-CLI issues page](https://github.com/sunnyyoung/WeChatTweak-CLI/issues/41).
</details>
#### Quick Start
<details>
<summary><strong>📋 Click to expand: Command Examples</strong></summary>
```bash
# Use default settings (recommended for first run)
python examples/wechat_history_reader_leann.py
# Run with custom export directory and wehn we run the first time, LEANN will export all chat history automatically for you
python examples/wechat_history_reader_leann.py --export-dir "./my_wechat_exports"
# Run with custom index directory
python examples/wechat_history_reader_leann.py --index-dir "./my_wechat_index"
# Limit number of chat entries processed (useful for testing)
python examples/wechat_history_reader_leann.py --max-entries 1000
# Run a single query
python examples/wechat_history_reader_leann.py --query "Show me conversations about travel plans"
```
</details>
#### Example Queries
<details>
<summary><strong>💬 Click to expand: Example queries you can try</strong></summary>
Once the index is built, you can ask questions like:
- "我想买魔术师约翰逊的球衣,给我一些对应聊天记录?" (Chinese: Show me chat records about buying Magic Johnson's jersey)
</details>
## 📊 Benchmarks ## 📊 Benchmarks

View File

@@ -1,6 +1,7 @@
import os import os
import asyncio import asyncio
import dotenv import dotenv
import argparse
from pathlib import Path from pathlib import Path
from typing import List, Any, Optional from typing import List, Any, Optional
from leann.api import LeannBuilder, LeannSearcher, LeannChat from leann.api import LeannBuilder, LeannSearcher, LeannChat
@@ -10,6 +11,9 @@ import time
dotenv.load_dotenv() dotenv.load_dotenv()
# Default WeChat export directory
DEFAULT_WECHAT_EXPORT_DIR = "./wechat_export_direct"
def create_leann_index_from_multiple_wechat_exports(export_dirs: List[Path], index_path: str = "wechat_history_index.leann", max_count: int = -1): def create_leann_index_from_multiple_wechat_exports(export_dirs: List[Path], index_path: str = "wechat_history_index.leann", max_count: int = -1):
""" """
Create LEANN index from multiple WeChat export data sources. Create LEANN index from multiple WeChat export data sources.
@@ -218,32 +222,55 @@ async def query_leann_index(index_path: str, query: str):
async def main(): async def main():
"""Main function with integrated WeChat export functionality.""" """Main function with integrated WeChat export functionality."""
# Parse command line arguments
parser = argparse.ArgumentParser(description='LEANN WeChat History Reader - Create and query WeChat chat history index')
parser.add_argument('--export-dir', type=str, default=DEFAULT_WECHAT_EXPORT_DIR,
help=f'Directory to store WeChat exports (default: {DEFAULT_WECHAT_EXPORT_DIR})')
parser.add_argument('--index-dir', type=str, default="./wechat_history_index_leann_test",
help='Directory to store the LEANN index (default: ./wechat_history_index_leann_test)')
parser.add_argument('--max-entries', type=int, default=5000,
help='Maximum number of chat entries to process (default: 5000)')
parser.add_argument('--query', type=str, default=None,
help='Single query to run (default: runs example queries)')
parser.add_argument('--force-export', action='store_true', default=False,
help='Force re-export of WeChat data even if exports exist')
args = parser.parse_args()
INDEX_DIR = Path(args.index_dir)
INDEX_PATH = str(INDEX_DIR / "wechat_history.leann")
print(f"Using WeChat export directory: {args.export_dir}")
print(f"Index directory: {INDEX_DIR}")
print(f"Max entries: {args.max_entries}")
# Initialize WeChat reader with export capabilities # Initialize WeChat reader with export capabilities
from history_data.wechat_history import WeChatHistoryReader from history_data.wechat_history import WeChatHistoryReader
reader = WeChatHistoryReader() reader = WeChatHistoryReader()
# Find existing exports or create new ones using the centralized method # Find existing exports or create new ones using the centralized method
export_dirs = reader.find_or_export_wechat_data("./wechat_export_direct") export_dirs = reader.find_or_export_wechat_data(args.export_dir, force_export=args.force_export)
if not export_dirs: if not export_dirs:
print("Failed to find or export WeChat data. Exiting.") print("Failed to find or export WeChat data. Exiting.")
return return
INDEX_DIR = Path("./wechat_history_index_leann_test")
INDEX_PATH = str(INDEX_DIR / "wechat_history.leann")
# Create or load the LEANN index from all sources # Create or load the LEANN index from all sources
index_path = create_leann_index_from_multiple_wechat_exports(export_dirs, INDEX_PATH, max_count=5000) index_path = create_leann_index_from_multiple_wechat_exports(export_dirs, INDEX_PATH, max_count=args.max_entries)
if index_path: if index_path:
# Example queries if args.query:
queries = [ # Run single query
"我想买魔术师约翰逊的球衣,给我一些对应聊天记录?", await query_leann_index(index_path, args.query)
] else:
# Example queries
for query in queries: queries = [
print("\n" + "="*60) "我想买魔术师约翰逊的球衣,给我一些对应聊天记录?",
await query_leann_index(index_path, query) ]
for query in queries:
print("\n" + "="*60)
await query_leann_index(index_path, query)
if __name__ == "__main__": if __name__ == "__main__":
asyncio.run(main()) asyncio.run(main())