diff --git a/README.md b/README.md index 13bae5d..def003d 100755 --- a/README.md +++ b/README.md @@ -16,9 +16,7 @@ LEANN is an innovative vector database that democratizes personal AI. Transform LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276) -**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy. - -> **🚀 Claude Code Integration!** LEANN now provides native MCP integration for Claude Code users. Index your codebase and get intelligent code assistance directly in Claude Code. [Setup Guide →](packages/leann-mcp/README.md) +**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[codebase](#-claude-code-integration-transform-your-development-workflow)**, **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy. @@ -213,6 +211,30 @@ All RAG examples share these common parameters. **Interactive mode** is availabl +### 🚀 Claude Code Integration: Transform Your Development Workflow! + +**The future of code assistance is here.** Transform your development workflow with LEANN's native MCP integration for Claude Code. Index your entire codebase and get intelligent code assistance directly in your IDE. + +
+ +**Key features:** +- 🔍 **Semantic code search** across your entire project +- 📚 **Context-aware assistance** for debugging and development +- 🚀 **Zero-config setup** with automatic language detection +- 🔒 **Complete privacy** - your code never leaves your machine + +```bash +# Install LEANN globally for MCP integration +uv tool install leann-core + +# Setup is automatic - just start using Claude Code! +``` + +**Ready to supercharge your coding?** [Complete Setup Guide →](packages/leann-mcp/README.md) + ### 📄 Personal Data Manager: Process Any Documents (`.pdf`, `.txt`, `.md`)! Ask questions directly about your personal PDFs, documents, and any directory containing your files! @@ -417,7 +439,6 @@ Once the index is built, you can ask questions like: - ## 🖥️ Command Line Interface LEANN includes a powerful CLI for document processing and search. Perfect for quick document indexing and interactive chat. diff --git a/packages/leann-backend-diskann/pyproject.toml b/packages/leann-backend-diskann/pyproject.toml index 48b2134..b0a168d 100644 --- a/packages/leann-backend-diskann/pyproject.toml +++ b/packages/leann-backend-diskann/pyproject.toml @@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build" [project] name = "leann-backend-diskann" -version = "0.2.1" -dependencies = ["leann-core==0.2.1", "numpy", "protobuf>=3.19.0"] +version = "0.2.2" +dependencies = ["leann-core==0.2.2", "numpy", "protobuf>=3.19.0"] [tool.scikit-build] # Key: simplified CMake path diff --git a/packages/leann-backend-hnsw/pyproject.toml b/packages/leann-backend-hnsw/pyproject.toml index f2b4b5c..3518cd2 100644 --- a/packages/leann-backend-hnsw/pyproject.toml +++ b/packages/leann-backend-hnsw/pyproject.toml @@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build" [project] name = "leann-backend-hnsw" -version = "0.2.1" +version = "0.2.2" description = "Custom-built HNSW (Faiss) backend for the Leann toolkit." dependencies = [ - "leann-core==0.2.1", + "leann-core==0.2.2", "numpy", "pyzmq>=23.0.0", "msgpack>=1.0.0", diff --git a/packages/leann-core/pyproject.toml b/packages/leann-core/pyproject.toml index e7d178d..c8f59b0 100644 --- a/packages/leann-core/pyproject.toml +++ b/packages/leann-core/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "leann-core" -version = "0.2.1" +version = "0.2.2" description = "Core API and plugin system for LEANN" readme = "README.md" requires-python = ">=3.9" diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py index 489c5d1..787cadd 100644 --- a/packages/leann-core/src/leann/cli.py +++ b/packages/leann-core/src/leann/cli.py @@ -74,10 +74,11 @@ class LeannCLI: formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: - leann build my-docs --docs ./documents # Build index named my-docs - leann search my-docs "query" # Search in my-docs index - leann ask my-docs "question" # Ask my-docs index - leann list # List all stored indexes + leann build my-docs --docs ./documents # Build index named my-docs + leann build my-ppts --docs ./ --file-types .pptx,.pdf # Index only PowerPoint and PDF files + leann search my-docs "query" # Search in my-docs index + leann ask my-docs "question" # Ask my-docs index + leann list # List all stored indexes """, ) @@ -99,6 +100,11 @@ Examples: build_parser.add_argument("--num-threads", type=int, default=1) build_parser.add_argument("--compact", action="store_true", default=True) build_parser.add_argument("--recompute", action="store_true", default=True) + build_parser.add_argument( + "--file-types", + type=str, + help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.", + ) # Search command search_parser = subparsers.add_parser("search", help="Search documents") @@ -108,7 +114,12 @@ Examples: search_parser.add_argument("--complexity", type=int, default=64) search_parser.add_argument("--beam-width", type=int, default=1) search_parser.add_argument("--prune-ratio", type=float, default=0.0) - search_parser.add_argument("--recompute-embeddings", action="store_true") + search_parser.add_argument( + "--recompute-embeddings", + action="store_true", + default=True, + help="Recompute embeddings (default: True)", + ) search_parser.add_argument( "--pruning-strategy", choices=["global", "local", "proportional"], @@ -131,7 +142,12 @@ Examples: ask_parser.add_argument("--complexity", type=int, default=32) ask_parser.add_argument("--beam-width", type=int, default=1) ask_parser.add_argument("--prune-ratio", type=float, default=0.0) - ask_parser.add_argument("--recompute-embeddings", action="store_true") + ask_parser.add_argument( + "--recompute-embeddings", + action="store_true", + default=True, + help="Recompute embeddings (default: True)", + ) ask_parser.add_argument( "--pruning-strategy", choices=["global", "local", "proportional"], @@ -254,8 +270,10 @@ Examples: print(f' leann search {example_name} "your query"') print(f" leann ask {example_name} --interactive") - def load_documents(self, docs_dir: str): + def load_documents(self, docs_dir: str, custom_file_types: str | None = None): print(f"Loading documents from {docs_dir}...") + if custom_file_types: + print(f"Using custom file types: {custom_file_types}") # Try to use better PDF parsers first documents = [] @@ -287,66 +305,81 @@ Examples: documents.extend(default_docs) # Load other file types with default reader - code_extensions = [ - # Original document types - ".txt", - ".md", - ".docx", - # Code files for Claude Code integration - ".py", - ".js", - ".ts", - ".jsx", - ".tsx", - ".java", - ".cpp", - ".c", - ".h", - ".hpp", - ".cs", - ".go", - ".rs", - ".rb", - ".php", - ".swift", - ".kt", - ".scala", - ".r", - ".sql", - ".sh", - ".bash", - ".zsh", - ".fish", - ".ps1", - ".bat", - # Config and markup files - ".json", - ".yaml", - ".yml", - ".xml", - ".toml", - ".ini", - ".cfg", - ".conf", - ".html", - ".css", - ".scss", - ".less", - ".vue", - ".svelte", - # Data science - ".ipynb", - ".R", - ".py", - ".jl", - ] - other_docs = SimpleDirectoryReader( - docs_dir, - recursive=True, - encoding="utf-8", - required_exts=code_extensions, - ).load_data(show_progress=True) - documents.extend(other_docs) + if custom_file_types: + # Parse custom file types from comma-separated string + code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()] + # Ensure extensions start with a dot + code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions] + else: + # Use default supported file types + code_extensions = [ + # Original document types + ".txt", + ".md", + ".docx", + ".pptx", + # Code files for Claude Code integration + ".py", + ".js", + ".ts", + ".jsx", + ".tsx", + ".java", + ".cpp", + ".c", + ".h", + ".hpp", + ".cs", + ".go", + ".rs", + ".rb", + ".php", + ".swift", + ".kt", + ".scala", + ".r", + ".sql", + ".sh", + ".bash", + ".zsh", + ".fish", + ".ps1", + ".bat", + # Config and markup files + ".json", + ".yaml", + ".yml", + ".xml", + ".toml", + ".ini", + ".cfg", + ".conf", + ".html", + ".css", + ".scss", + ".less", + ".vue", + ".svelte", + # Data science + ".ipynb", + ".R", + ".py", + ".jl", + ] + # Try to load other file types, but don't fail if none are found + try: + other_docs = SimpleDirectoryReader( + docs_dir, + recursive=True, + encoding="utf-8", + required_exts=code_extensions, + ).load_data(show_progress=True) + documents.extend(other_docs) + except ValueError as e: + if "No files found" in str(e): + print("No additional files found for other supported types.") + else: + raise e all_texts = [] @@ -424,7 +457,7 @@ Examples: print(f"Index '{index_name}' already exists. Use --force to rebuild.") return - all_texts = self.load_documents(docs_dir) + all_texts = self.load_documents(docs_dir, args.file_types) if not all_texts: print("No documents found") return diff --git a/packages/leann/pyproject.toml b/packages/leann/pyproject.toml index 1f3cb50..eeb4050 100644 --- a/packages/leann/pyproject.toml +++ b/packages/leann/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "leann" -version = "0.2.1" +version = "0.2.2" description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!" readme = "README.md" requires-python = ">=3.9"