diff --git a/README.md b/README.md
index b43b47a..66d4520 100755
--- a/README.md
+++ b/README.md
@@ -176,7 +176,7 @@ response = chat.ask("How much storage does LEANN save?", top_k=1)
## RAG on Everything!
-LEANN supports RAG on various data sources including documents (`.pdf`, `.txt`, `.md`), Apple Mail, Google Search History, WeChat, ChatGPT conversations, Claude conversations, and more.
+LEANN supports RAG on various data sources including documents (`.pdf`, `.txt`, `.md`), Apple Mail, Google Search History, WeChat, ChatGPT conversations, Claude conversations, iMessage conversations, and more.
@@ -625,6 +625,90 @@ Once your Claude conversations are indexed, you can search with queries like:
+### 💬 iMessage History: Your Personal Conversation Archive!
+
+Transform your iMessage conversations into a searchable knowledge base! Search through all your text messages, group chats, and conversations with friends, family, and colleagues.
+
+```bash
+python -m apps.imessage_rag --query "What did we discuss about the weekend plans?"
+```
+
+**Unlock your message history.** Never lose track of important conversations, shared links, or memorable moments from your iMessage history.
+
+
+📋 Click to expand: How to Access iMessage Data
+
+**iMessage data location:**
+
+iMessage conversations are stored in a SQLite database on your Mac at:
+```
+~/Library/Messages/chat.db
+```
+
+**Important setup requirements:**
+
+1. **Grant Full Disk Access** to your terminal or IDE:
+ - Open **System Preferences** → **Security & Privacy** → **Privacy**
+ - Select **Full Disk Access** from the left sidebar
+ - Click the **+** button and add your terminal app (Terminal, iTerm2) or IDE (VS Code, etc.)
+ - Restart your terminal/IDE after granting access
+
+2. **Alternative: Use a backup database**
+ - If you have Time Machine backups or manual copies of the database
+ - Use `--db-path` to specify a custom location
+
+**Supported formats:**
+- Direct access to `~/Library/Messages/chat.db` (default)
+- Custom database path with `--db-path`
+- Works with backup copies of the database
+
+
+
+
+📋 Click to expand: iMessage-Specific Arguments
+
+#### Parameters
+```bash
+--db-path PATH # Path to chat.db file (default: ~/Library/Messages/chat.db)
+--concatenate-conversations # Group messages by conversation (default: True)
+--no-concatenate-conversations # Process each message individually
+--chunk-size N # Text chunk size (default: 1000)
+--chunk-overlap N # Overlap between chunks (default: 200)
+```
+
+#### Example Commands
+```bash
+# Basic usage (requires Full Disk Access)
+python -m apps.imessage_rag
+
+# Search with specific query
+python -m apps.imessage_rag --query "family dinner plans"
+
+# Use custom database path
+python -m apps.imessage_rag --db-path /path/to/backup/chat.db
+
+# Process individual messages instead of conversations
+python -m apps.imessage_rag --no-concatenate-conversations
+
+# Limit processing for testing
+python -m apps.imessage_rag --max-items 100 --query "weekend"
+```
+
+
+
+
+💡 Click to expand: Example queries you can try
+
+Once your iMessage conversations are indexed, you can search with queries like:
+- "What did we discuss about vacation plans?"
+- "Find messages about restaurant recommendations"
+- "Show me conversations with John about the project"
+- "Search for shared links about technology"
+- "Find group chat discussions about weekend events"
+- "What did mom say about the family gathering?"
+
+
+
### 🚀 Claude Code Integration: Transform Your Development Workflow!
diff --git a/apps/imessage_data/__init__.py b/apps/imessage_data/__init__.py
new file mode 100644
index 0000000..9e9e3fc
--- /dev/null
+++ b/apps/imessage_data/__init__.py
@@ -0,0 +1 @@
+"""iMessage data processing module."""
diff --git a/apps/imessage_data/imessage_reader.py b/apps/imessage_data/imessage_reader.py
new file mode 100644
index 0000000..4dfc0af
--- /dev/null
+++ b/apps/imessage_data/imessage_reader.py
@@ -0,0 +1,342 @@
+"""
+iMessage data reader.
+
+Reads and processes iMessage conversation data from the macOS Messages database.
+"""
+
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+from typing import Any
+
+from llama_index.core import Document
+from llama_index.core.readers.base import BaseReader
+
+
+class IMessageReader(BaseReader):
+ """
+ iMessage data reader.
+
+ Reads iMessage conversation data from the macOS Messages database (chat.db).
+ Processes conversations into structured documents with metadata.
+ """
+
+ def __init__(self, concatenate_conversations: bool = True) -> None:
+ """
+ Initialize.
+
+ Args:
+ concatenate_conversations: Whether to concatenate messages within conversations for better context
+ """
+ self.concatenate_conversations = concatenate_conversations
+
+ def _get_default_chat_db_path(self) -> Path:
+ """
+ Get the default path to the iMessage chat database.
+
+ Returns:
+ Path to the chat.db file
+ """
+ home = Path.home()
+ return home / "Library" / "Messages" / "chat.db"
+
+ def _convert_cocoa_timestamp(self, cocoa_timestamp: int) -> str:
+ """
+ Convert Cocoa timestamp to readable format.
+
+ Args:
+ cocoa_timestamp: Timestamp in Cocoa format (nanoseconds since 2001-01-01)
+
+ Returns:
+ Formatted timestamp string
+ """
+ if cocoa_timestamp == 0:
+ return "Unknown"
+
+ try:
+ # Cocoa timestamp is nanoseconds since 2001-01-01 00:00:00 UTC
+ # Convert to seconds and add to Unix epoch
+ cocoa_epoch = datetime(2001, 1, 1)
+ unix_timestamp = cocoa_timestamp / 1_000_000_000 # Convert nanoseconds to seconds
+ message_time = cocoa_epoch.timestamp() + unix_timestamp
+ return datetime.fromtimestamp(message_time).strftime("%Y-%m-%d %H:%M:%S")
+ except (ValueError, OSError):
+ return "Unknown"
+
+ def _get_contact_name(self, handle_id: str) -> str:
+ """
+ Get a readable contact name from handle ID.
+
+ Args:
+ handle_id: The handle ID (phone number or email)
+
+ Returns:
+ Formatted contact name
+ """
+ if not handle_id:
+ return "Unknown"
+
+ # Clean up phone numbers and emails for display
+ if "@" in handle_id:
+ return handle_id # Email address
+ elif handle_id.startswith("+"):
+ return handle_id # International phone number
+ else:
+ # Try to format as phone number
+ digits = "".join(filter(str.isdigit, handle_id))
+ if len(digits) == 10:
+ return f"({digits[:3]}) {digits[3:6]}-{digits[6:]}"
+ elif len(digits) == 11 and digits[0] == "1":
+ return f"+1 ({digits[1:4]}) {digits[4:7]}-{digits[7:]}"
+ else:
+ return handle_id
+
+ def _read_messages_from_db(self, db_path: Path) -> list[dict]:
+ """
+ Read messages from the iMessage database.
+
+ Args:
+ db_path: Path to the chat.db file
+
+ Returns:
+ List of message dictionaries
+ """
+ if not db_path.exists():
+ print(f"iMessage database not found at: {db_path}")
+ return []
+
+ try:
+ # Connect to the database
+ conn = sqlite3.connect(str(db_path))
+ cursor = conn.cursor()
+
+ # Query to get messages with chat and handle information
+ query = """
+ SELECT
+ m.ROWID as message_id,
+ m.text,
+ m.date,
+ m.is_from_me,
+ m.service,
+ c.chat_identifier,
+ c.display_name as chat_display_name,
+ h.id as handle_id,
+ c.ROWID as chat_id
+ FROM message m
+ LEFT JOIN chat_message_join cmj ON m.ROWID = cmj.message_id
+ LEFT JOIN chat c ON cmj.chat_id = c.ROWID
+ LEFT JOIN handle h ON m.handle_id = h.ROWID
+ WHERE m.text IS NOT NULL AND m.text != ''
+ ORDER BY c.ROWID, m.date
+ """
+
+ cursor.execute(query)
+ rows = cursor.fetchall()
+
+ messages = []
+ for row in rows:
+ (
+ message_id,
+ text,
+ date,
+ is_from_me,
+ service,
+ chat_identifier,
+ chat_display_name,
+ handle_id,
+ chat_id,
+ ) = row
+
+ message = {
+ "message_id": message_id,
+ "text": text,
+ "timestamp": self._convert_cocoa_timestamp(date),
+ "is_from_me": bool(is_from_me),
+ "service": service or "iMessage",
+ "chat_identifier": chat_identifier or "Unknown",
+ "chat_display_name": chat_display_name or "Unknown Chat",
+ "handle_id": handle_id or "Unknown",
+ "contact_name": self._get_contact_name(handle_id or ""),
+ "chat_id": chat_id,
+ }
+ messages.append(message)
+
+ conn.close()
+ print(f"Found {len(messages)} messages in database")
+ return messages
+
+ except sqlite3.Error as e:
+ print(f"Error reading iMessage database: {e}")
+ return []
+ except Exception as e:
+ print(f"Unexpected error reading iMessage database: {e}")
+ return []
+
+ def _group_messages_by_chat(self, messages: list[dict]) -> dict[int, list[dict]]:
+ """
+ Group messages by chat ID.
+
+ Args:
+ messages: List of message dictionaries
+
+ Returns:
+ Dictionary mapping chat_id to list of messages
+ """
+ chats = {}
+ for message in messages:
+ chat_id = message["chat_id"]
+ if chat_id not in chats:
+ chats[chat_id] = []
+ chats[chat_id].append(message)
+
+ return chats
+
+ def _create_concatenated_content(self, chat_id: int, messages: list[dict]) -> str:
+ """
+ Create concatenated content from chat messages.
+
+ Args:
+ chat_id: The chat ID
+ messages: List of messages in the chat
+
+ Returns:
+ Concatenated text content
+ """
+ if not messages:
+ return ""
+
+ # Get chat info from first message
+ first_msg = messages[0]
+ chat_name = first_msg["chat_display_name"]
+ chat_identifier = first_msg["chat_identifier"]
+
+ # Build message content
+ message_parts = []
+ for message in messages:
+ timestamp = message["timestamp"]
+ is_from_me = message["is_from_me"]
+ text = message["text"]
+ contact_name = message["contact_name"]
+
+ if is_from_me:
+ prefix = "[You]"
+ else:
+ prefix = f"[{contact_name}]"
+
+ if timestamp != "Unknown":
+ prefix += f" ({timestamp})"
+
+ message_parts.append(f"{prefix}: {text}")
+
+ concatenated_text = "\n\n".join(message_parts)
+
+ doc_content = f"""Chat: {chat_name}
+Identifier: {chat_identifier}
+Messages ({len(messages)} messages):
+
+{concatenated_text}
+"""
+ return doc_content
+
+ def _create_individual_content(self, message: dict) -> str:
+ """
+ Create content for individual message.
+
+ Args:
+ message: Message dictionary
+
+ Returns:
+ Formatted message content
+ """
+ timestamp = message["timestamp"]
+ is_from_me = message["is_from_me"]
+ text = message["text"]
+ contact_name = message["contact_name"]
+ chat_name = message["chat_display_name"]
+
+ sender = "You" if is_from_me else contact_name
+
+ return f"""Message from {sender} in chat "{chat_name}"
+Time: {timestamp}
+Content: {text}
+"""
+
+ def load_data(self, input_dir: str | None = None, **load_kwargs: Any) -> list[Document]:
+ """
+ Load iMessage data and return as documents.
+
+ Args:
+ input_dir: Optional path to directory containing chat.db file.
+ If not provided, uses default macOS location.
+ **load_kwargs: Additional arguments (unused)
+
+ Returns:
+ List of Document objects containing iMessage data
+ """
+ docs = []
+
+ # Determine database path
+ if input_dir:
+ db_path = Path(input_dir) / "chat.db"
+ else:
+ db_path = self._get_default_chat_db_path()
+
+ print(f"Reading iMessage database from: {db_path}")
+
+ # Read messages from database
+ messages = self._read_messages_from_db(db_path)
+ if not messages:
+ return docs
+
+ if self.concatenate_conversations:
+ # Group messages by chat and create concatenated documents
+ chats = self._group_messages_by_chat(messages)
+
+ for chat_id, chat_messages in chats.items():
+ if not chat_messages:
+ continue
+
+ content = self._create_concatenated_content(chat_id, chat_messages)
+
+ # Create metadata
+ first_msg = chat_messages[0]
+ last_msg = chat_messages[-1]
+
+ metadata = {
+ "source": "iMessage",
+ "chat_id": chat_id,
+ "chat_name": first_msg["chat_display_name"],
+ "chat_identifier": first_msg["chat_identifier"],
+ "message_count": len(chat_messages),
+ "first_message_date": first_msg["timestamp"],
+ "last_message_date": last_msg["timestamp"],
+ "participants": list(
+ {msg["contact_name"] for msg in chat_messages if not msg["is_from_me"]}
+ ),
+ }
+
+ doc = Document(text=content, metadata=metadata)
+ docs.append(doc)
+
+ else:
+ # Create individual documents for each message
+ for message in messages:
+ content = self._create_individual_content(message)
+
+ metadata = {
+ "source": "iMessage",
+ "message_id": message["message_id"],
+ "chat_id": message["chat_id"],
+ "chat_name": message["chat_display_name"],
+ "chat_identifier": message["chat_identifier"],
+ "timestamp": message["timestamp"],
+ "is_from_me": message["is_from_me"],
+ "contact_name": message["contact_name"],
+ "service": message["service"],
+ }
+
+ doc = Document(text=content, metadata=metadata)
+ docs.append(doc)
+
+ print(f"Created {len(docs)} documents from iMessage data")
+ return docs
diff --git a/apps/imessage_rag.py b/apps/imessage_rag.py
new file mode 100644
index 0000000..50032ec
--- /dev/null
+++ b/apps/imessage_rag.py
@@ -0,0 +1,125 @@
+"""
+iMessage RAG Example.
+
+This example demonstrates how to build a RAG system on your iMessage conversation history.
+"""
+
+import asyncio
+from pathlib import Path
+
+from leann.chunking_utils import create_text_chunks
+
+from apps.base_rag_example import BaseRAGExample
+from apps.imessage_data.imessage_reader import IMessageReader
+
+
+class IMessageRAG(BaseRAGExample):
+ """RAG example for iMessage conversation history."""
+
+ def __init__(self):
+ super().__init__(
+ name="iMessage",
+ description="RAG on your iMessage conversation history",
+ default_index_name="imessage_index",
+ )
+
+ def _add_specific_arguments(self, parser):
+ """Add iMessage-specific arguments."""
+ imessage_group = parser.add_argument_group("iMessage Parameters")
+ imessage_group.add_argument(
+ "--db-path",
+ type=str,
+ default=None,
+ help="Path to iMessage chat.db file (default: ~/Library/Messages/chat.db)",
+ )
+ imessage_group.add_argument(
+ "--concatenate-conversations",
+ action="store_true",
+ default=True,
+ help="Concatenate messages within conversations for better context (default: True)",
+ )
+ imessage_group.add_argument(
+ "--no-concatenate-conversations",
+ action="store_true",
+ help="Process each message individually instead of concatenating by conversation",
+ )
+ imessage_group.add_argument(
+ "--chunk-size",
+ type=int,
+ default=1000,
+ help="Maximum characters per text chunk (default: 1000)",
+ )
+ imessage_group.add_argument(
+ "--chunk-overlap",
+ type=int,
+ default=200,
+ help="Overlap between text chunks (default: 200)",
+ )
+
+ async def load_data(self, args) -> list[str]:
+ """Load iMessage history and convert to text chunks."""
+ print("Loading iMessage conversation history...")
+
+ # Determine concatenation setting
+ concatenate = args.concatenate_conversations and not args.no_concatenate_conversations
+
+ # Initialize iMessage reader
+ reader = IMessageReader(concatenate_conversations=concatenate)
+
+ # Load documents
+ try:
+ if args.db_path:
+ # Use custom database path
+ db_dir = str(Path(args.db_path).parent)
+ documents = reader.load_data(input_dir=db_dir)
+ else:
+ # Use default macOS location
+ documents = reader.load_data()
+
+ except Exception as e:
+ print(f"Error loading iMessage data: {e}")
+ print("\nTroubleshooting tips:")
+ print("1. Make sure you have granted Full Disk Access to your terminal/IDE")
+ print("2. Check that the iMessage database exists at ~/Library/Messages/chat.db")
+ print("3. Try specifying a custom path with --db-path if you have a backup")
+ return []
+
+ if not documents:
+ print("No iMessage conversations found!")
+ return []
+
+ print(f"Loaded {len(documents)} iMessage documents")
+
+ # Show some statistics
+ total_messages = sum(doc.metadata.get("message_count", 1) for doc in documents)
+ print(f"Total messages: {total_messages}")
+
+ if concatenate:
+ # Show chat statistics
+ chat_names = [doc.metadata.get("chat_name", "Unknown") for doc in documents]
+ unique_chats = len(set(chat_names))
+ print(f"Unique conversations: {unique_chats}")
+
+ # Convert to text chunks
+ all_texts = create_text_chunks(
+ documents,
+ chunk_size=args.chunk_size,
+ chunk_overlap=args.chunk_overlap,
+ )
+
+ # Apply max_items limit if specified
+ if args.max_items > 0:
+ all_texts = all_texts[: args.max_items]
+ print(f"Limited to {len(all_texts)} text chunks (max_items={args.max_items})")
+
+ return all_texts
+
+
+async def main():
+ """Main entry point."""
+ app = IMessageRAG()
+ await app.run()
+
+
+if __name__ == "__main__":
+ asyncio.run(main())