Merge remote-tracking branch 'origin/main' into feature/claude-code-research

feat: Add Claude Code integration with MCP server
feat: Claude Code integration ready - LEANN CLI works out of the box
2025-08-05 23:02:00 -07:00 · 2025-08-05 14:03:36 -07:00 · 2025-08-05 12:27:58 -07:00 · 2025-08-04 20:10:14 -07:00 · 2025-08-04 20:01:23 -07:00 · 2025-08-04 19:29:17 -07:00
6 changed files with 75 additions and 108 deletions
--- a/packages/leann-backend-diskann/pyproject.toml
+++ b/packages/leann-backend-diskann/pyproject.toml
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
 [project]
 name = "leann-backend-diskann"
-version = "0.2.2"
+version = "0.2.1"
-dependencies = ["leann-core==0.2.2", "numpy", "protobuf>=3.19.0"]
+dependencies = ["leann-core==0.2.1", "numpy", "protobuf>=3.19.0"]
 [tool.scikit-build]
 # Key: simplified CMake path
--- a/packages/leann-backend-diskann/third_party/DiskANN
+++ b/packages/leann-backend-diskann/third_party/DiskANN
--- a/packages/leann-backend-hnsw/pyproject.toml
+++ b/packages/leann-backend-hnsw/pyproject.toml
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
 [project]
 name = "leann-backend-hnsw"
-version = "0.2.2"
+version = "0.2.1"
 description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
 dependencies = [
-    "leann-core==0.2.2",
+    "leann-core==0.2.1",
    "numpy",
    "pyzmq>=23.0.0",
    "msgpack>=1.0.0",
--- a/packages/leann-core/pyproject.toml
+++ b/packages/leann-core/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "leann-core"
-version = "0.2.2"
+version = "0.2.1"
 description = "Core API and plugin system for LEANN"
 readme = "README.md"
 requires-python = ">=3.9"
--- a/packages/leann-core/src/leann/cli.py
+++ b/packages/leann-core/src/leann/cli.py
@@ -74,11 +74,10 @@ class LeannCLI:
            formatter_class=argparse.RawDescriptionHelpFormatter,
            epilog="""
 Examples:
-  leann build my-docs --docs ./documents                    # Build index named my-docs
+  leann build my-docs --docs ./documents    # Build index named my-docs
-  leann build my-ppts --docs ./ --file-types .pptx,.pdf    # Index only PowerPoint and PDF files
+  leann search my-docs "query"             # Search in my-docs index
-  leann search my-docs "query"                             # Search in my-docs index
+  leann ask my-docs "question"             # Ask my-docs index
-  leann ask my-docs "question"                             # Ask my-docs index
+  leann list                              # List all stored indexes
  leann list                                              # List all stored indexes
            """,
        )
@@ -100,11 +99,6 @@ Examples:
        build_parser.add_argument("--num-threads", type=int, default=1)
        build_parser.add_argument("--compact", action="store_true", default=True)
        build_parser.add_argument("--recompute", action="store_true", default=True)
        build_parser.add_argument(
            "--file-types",
            type=str,
            help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
        )
        # Search command
        search_parser = subparsers.add_parser("search", help="Search documents")
@@ -114,12 +108,7 @@ Examples:
        search_parser.add_argument("--complexity", type=int, default=64)
        search_parser.add_argument("--beam-width", type=int, default=1)
        search_parser.add_argument("--prune-ratio", type=float, default=0.0)
-        search_parser.add_argument(
+        search_parser.add_argument("--recompute-embeddings", action="store_true")
            "--recompute-embeddings",
            action="store_true",
            default=True,
            help="Recompute embeddings (default: True)",
        )
        search_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
@@ -142,12 +131,7 @@ Examples:
        ask_parser.add_argument("--complexity", type=int, default=32)
        ask_parser.add_argument("--beam-width", type=int, default=1)
        ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
-        ask_parser.add_argument(
+        ask_parser.add_argument("--recompute-embeddings", action="store_true")
            "--recompute-embeddings",
            action="store_true",
            default=True,
            help="Recompute embeddings (default: True)",
        )
        ask_parser.add_argument(
            "--pruning-strategy",
            choices=["global", "local", "proportional"],
@@ -270,10 +254,8 @@ Examples:
                    print(f'  leann search {example_name} "your query"')
                    print(f"  leann ask {example_name} --interactive")
-    def load_documents(self, docs_dir: str, custom_file_types: str | None = None):
+    def load_documents(self, docs_dir: str):
        print(f"Loading documents from {docs_dir}...")
        if custom_file_types:
            print(f"Using custom file types: {custom_file_types}")
        # Try to use better PDF parsers first
        documents = []
@@ -305,81 +287,66 @@ Examples:
                documents.extend(default_docs)
        # Load other file types with default reader
-        if custom_file_types:
+        code_extensions = [
-            # Parse custom file types from comma-separated string
+            # Original document types
-            code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
+            ".txt",
-            # Ensure extensions start with a dot
+            ".md",
-            code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
+            ".docx",
-        else:
+            # Code files for Claude Code integration
-            # Use default supported file types
+            ".py",
-            code_extensions = [
+            ".js",
-                # Original document types
+            ".ts",
-                ".txt",
+            ".jsx",
-                ".md",
+            ".tsx",
-                ".docx",
+            ".java",
-                ".pptx",
+            ".cpp",
-                # Code files for Claude Code integration
+            ".c",
-                ".py",
+            ".h",
-                ".js",
+            ".hpp",
-                ".ts",
+            ".cs",
-                ".jsx",
+            ".go",
-                ".tsx",
+            ".rs",
-                ".java",
+            ".rb",
-                ".cpp",
+            ".php",
-                ".c",
+            ".swift",
-                ".h",
+            ".kt",
-                ".hpp",
+            ".scala",
-                ".cs",
+            ".r",
-                ".go",
+            ".sql",
-                ".rs",
+            ".sh",
-                ".rb",
+            ".bash",
-                ".php",
+            ".zsh",
-                ".swift",
+            ".fish",
-                ".kt",
+            ".ps1",
-                ".scala",
+            ".bat",
-                ".r",
+            # Config and markup files
-                ".sql",
+            ".json",
-                ".sh",
+            ".yaml",
-                ".bash",
+            ".yml",
-                ".zsh",
+            ".xml",
-                ".fish",
+            ".toml",
-                ".ps1",
+            ".ini",
-                ".bat",
+            ".cfg",
-                # Config and markup files
+            ".conf",
-                ".json",
+            ".html",
-                ".yaml",
+            ".css",
-                ".yml",
+            ".scss",
-                ".xml",
+            ".less",
-                ".toml",
+            ".vue",
-                ".ini",
+            ".svelte",
-                ".cfg",
+            # Data science
-                ".conf",
+            ".ipynb",
-                ".html",
+            ".R",
-                ".css",
+            ".py",
-                ".scss",
+            ".jl",
-                ".less",
+        ]
-                ".vue",
+        other_docs = SimpleDirectoryReader(
-                ".svelte",
+            docs_dir,
-                # Data science
+            recursive=True,
-                ".ipynb",
+            encoding="utf-8",
-                ".R",
+            required_exts=code_extensions,
-                ".py",
+        ).load_data(show_progress=True)
-                ".jl",
+        documents.extend(other_docs)
            ]
        # Try to load other file types, but don't fail if none are found
        try:
            other_docs = SimpleDirectoryReader(
                docs_dir,
                recursive=True,
                encoding="utf-8",
                required_exts=code_extensions,
            ).load_data(show_progress=True)
            documents.extend(other_docs)
        except ValueError as e:
            if "No files found" in str(e):
                print("No additional files found for other supported types.")
            else:
                raise e
        all_texts = []
@@ -457,7 +424,7 @@ Examples:
            print(f"Index '{index_name}' already exists. Use --force to rebuild.")
            return
-        all_texts = self.load_documents(docs_dir, args.file_types)
+        all_texts = self.load_documents(docs_dir)
        if not all_texts:
            print("No documents found")
            return
--- a/packages/leann/pyproject.toml
+++ b/packages/leann/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "leann"
-version = "0.2.2"
+version = "0.2.1"
 description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
 readme = "README.md"
 requires-python = ">=3.9"
Author	SHA1	Message	Date
Andy Lee	b55eeeae5f	Merge remote-tracking branch 'origin/main' into feature/claude-code-research	2025-08-05 23:02:00 -07:00
Andy Lee	e890b2311f	feat: Add Claude Code integration with MCP server	2025-08-05 14:03:36 -07:00
Andy Lee	f3d99fd118	feat: Claude Code integration ready - LEANN CLI works out of the box ✅ Verified LEANN CLI works perfectly with Claude Code ✅ Added integration guide with working examples ✅ Documented simple workflow for immediate use Key findings: - No code changes needed - Just need --recompute-embeddings flag - Search, ask, and build all work - Ready for Claude Code agents and workflows	2025-08-05 12:27:58 -07:00
Andy Lee	8eee90bf80	docs: add a link	2025-08-04 20:10:14 -07:00
Andy Lee	649d4ad03e	docs: Address all configuration guide feedback - Fix grammar: 'If time is not a constraint' instead of 'time expense is not large' - Highlight Qwen3-Embedding-0.6B performance (nearly OpenAI API level) - Add OpenAI quick start section with configuration example - Fold Cloud vs Local trade-offs into collapsible section - Update HNSW as 'default and recommended for extreme low storage' - Add DiskANN beta warning and explain PQ+rerank architecture - Expand Ollama models: add qwen3:0.6b, 4b, 7b variants - Note OpenAI as current default but recommend Ollama switch - Add 'need to install extra software' warning for Ollama - Remove incorrect latency numbers from search-complexity recommendations	2025-08-04 20:01:23 -07:00
Andy Lee	d9b6f195c5	docs: Improve configuration guide based on feedback - List specific files in default data/ directory (2 AI papers, literature, tech report) - Update examples to use English and better RAG-suitable queries - Change full dataset reference to use --max-items -1 - Adjust small model guidance about upgrading to larger models when time allows - Update top-k defaults to reflect actual default of 20 - Ensure consistent use of full model name Qwen/Qwen3-Embedding-0.6B - Reorder optimization steps, move MLX to third position - Remove incorrect chunk size tuning guidance - Change README from 'Having trouble' to 'Need best practices'	2025-08-04 19:29:17 -07:00
Andy Lee	00f506c0bd	docs: Adjust DiskANN positioning in features and roadmap - features.md: Put HNSW/FAISS first as default, DiskANN as optional - roadmap.md: Reorder to show HNSW integration before DiskANN - Consistent with positioning DiskANN as advanced option for large-scale use	2025-08-04 17:53:27 -07:00
Andy Lee	e872dd1d23	docs: Weaken DiskANN emphasis in README - Change backend description to emphasize HNSW as default - DiskANN positioned as optional for billion-scale datasets - Simplify evaluation commands to be more generic	2025-08-04 17:51:21 -07:00
Andy Lee	063c687ff7	chore: move evaluation data .gitattributes to correct location	2025-08-04 17:46:17 -07:00
Andy Lee	bb8ecd54d7	feat: add comprehensive configuration guide and update README - Create docs/configuration-guide.md with detailed guidance on: - Embedding model selection (small/medium/large) - Index selection (HNSW vs DiskANN) - LLM engine and model comparison - Parameter tuning (build/search complexity, top-k) - Performance optimization tips - Deep dive into LEANN's recomputation feature - Update README.md to link to the configuration guide - Include latest 2025 model recommendations (Qwen3, DeepSeek-R1, O3-mini)	2025-08-04 17:41:27 -07:00
Andy Lee	716217ae24	docs: config guidance	2025-08-04 16:21:13 -07:00