diff --git a/README.md b/README.md index c79d703..3c11c0d 100755 --- a/README.md +++ b/README.md @@ -198,6 +198,68 @@ Once the index is built, you can ask questions like: +### 🌐 Lightweight RAG on your Google Chrome History + +LEANN can create a searchable index of your Chrome browser history, allowing you to query your browsing history using natural language. + +#### Quick Start + +
+📋 Click to expand: Command Examples + +```bash +# Use default Chrome profile (auto-finds all profiles) and recommand method to run this because usually default file is enough +python examples/google_history_reader_leann.py + + +# Run with custom index directory +python examples/google_history_reader_leann.py --index-dir "./my_chrome_index" + +# Limit number of history entries processed (useful for testing) +python examples/google_history_reader_leann.py --max-entries 500 + +# Run a single query +python examples/google_history_reader_leann.py --query "What websites did I visit about machine learning?" + +# Use only a specific profile (disable auto-find) +python examples/google_history_reader_leann.py --chrome-profile "~/Library/Application Support/Google/Chrome/Default" --no-auto-find-profiles +``` + +
+ +#### Finding Your Chrome Profile + +
+🔍 Click to expand: How to find your Chrome profile + +The default Chrome profile path is configured for a typical macOS setup. If you need to find your specific Chrome profile: + +1. Open Terminal +2. Run: `ls ~/Library/Application\ Support/Google/Chrome/` +3. Look for folders like "Default", "Profile 1", "Profile 2", etc. +4. Use the full path as your `--chrome-profile` argument + +**Common Chrome profile locations:** +- macOS: `~/Library/Application Support/Google/Chrome/Default` +- Linux: `~/.config/google-chrome/Default` + +
+ +#### Example Queries + +
+💬 Click to expand: Example queries you can try + +Once the index is built, you can ask questions like: +- "What websites did I visit about machine learning?" +- "Find my search history about programming" +- "What YouTube videos did I watch recently?" +- "Show me websites I visited about travel planning" + +
+ + + ## 📊 Benchmarks diff --git a/examples/google_history_reader_leann.py b/examples/google_history_reader_leann.py index 4e92f7d..7342287 100644 --- a/examples/google_history_reader_leann.py +++ b/examples/google_history_reader_leann.py @@ -1,5 +1,6 @@ import os import asyncio +import argparse try: import dotenv dotenv.load_dotenv() @@ -13,6 +14,9 @@ from llama_index.core.node_parser import SentenceSplitter # dotenv.load_dotenv() # handled above if python-dotenv is available +# Default Chrome profile path +DEFAULT_CHROME_PROFILE = os.path.expanduser("~/Library/Application Support/Google/Chrome/Default") + def create_leann_index_from_multiple_chrome_profiles(profile_dirs: List[Path], index_path: str = "chrome_history_index.leann", max_count: int = -1): """ Create LEANN index from multiple Chrome profile data sources. @@ -217,32 +221,63 @@ async def query_leann_index(index_path: str, query: str): print(f"Leann: {chat_response}") async def main(): - # Default Chrome profile path - default_chrome_profile = os.path.expanduser("~/Library/Application Support/Google/Chrome/Default") + # Parse command line arguments + parser = argparse.ArgumentParser(description='LEANN Chrome History Reader - Create and query browser history index') + parser.add_argument('--chrome-profile', type=str, default=DEFAULT_CHROME_PROFILE, + help=f'Path to Chrome profile directory (default: {DEFAULT_CHROME_PROFILE}), usually you dont need to change this') + parser.add_argument('--index-dir', type=str, default="./chrome_history_index_leann_test", + help='Directory to store the LEANN index (default: ./chrome_history_index_leann_test)') + parser.add_argument('--max-entries', type=int, default=1000, + help='Maximum number of history entries to process (default: 1000)') + parser.add_argument('--query', type=str, default=None, + help='Single query to run (default: runs example queries)') + parser.add_argument('--auto-find-profiles', action='store_true', default=True, + help='Automatically find all Chrome profiles (default: True)') - INDEX_DIR = Path("./chrome_history_index_leann_test") + args = parser.parse_args() + + INDEX_DIR = Path(args.index_dir) INDEX_PATH = str(INDEX_DIR / "chrome_history.leann") - # Find all Chrome profile directories - from history_data.history import ChromeHistoryReader - profile_dirs = ChromeHistoryReader.find_chrome_profiles() + print(f"Using Chrome profile: {args.chrome_profile}") + print(f"Index directory: {INDEX_DIR}") + print(f"Max entries: {args.max_entries}") - if not profile_dirs: - print("No Chrome profiles found. Exiting.") - return + # Find Chrome profile directories + from history_data.history import ChromeHistoryReader + + if args.auto_find_profiles: + profile_dirs = ChromeHistoryReader.find_chrome_profiles() + if not profile_dirs: + print("No Chrome profiles found automatically. Exiting.") + return + else: + # Use single specified profile + profile_path = Path(args.chrome_profile) + if not profile_path.exists(): + print(f"Chrome profile not found: {profile_path}") + return + profile_dirs = [profile_path] # Create or load the LEANN index from all sources - index_path = create_leann_index_from_multiple_chrome_profiles(profile_dirs, INDEX_PATH,1000) + index_path = create_leann_index_from_multiple_chrome_profiles(profile_dirs, INDEX_PATH, args.max_entries) if index_path: - # Example queries - queries = [ - "What websites did I visit about machine learning?", - ] - - for query in queries: - print("\n" + "="*60) - await query_leann_index(index_path, query) + if args.query: + # Run single query + await query_leann_index(index_path, args.query) + else: + # Example queries + queries = [ + "What websites did I visit about machine learning?", + "Show me my recent shopping history", + "What news sites did I visit this week?", + "Find my search history about programming" + ] + + for query in queries: + print("\n" + "="*60) + await query_leann_index(index_path, query) if __name__ == "__main__": asyncio.run(main()) \ No newline at end of file