Compare commits

..

11 Commits

Author SHA1 Message Date
Andy Lee
b55eeeae5f Merge remote-tracking branch 'origin/main' into feature/claude-code-research 2025-08-05 23:02:00 -07:00
Andy Lee
e890b2311f feat: Add Claude Code integration with MCP server 2025-08-05 14:03:36 -07:00
Andy Lee
f3d99fd118 feat: Claude Code integration ready - LEANN CLI works out of the box
 Verified LEANN CLI works perfectly with Claude Code
 Added integration guide with working examples
 Documented simple workflow for immediate use

Key findings:
- No code changes needed
- Just need --recompute-embeddings flag
- Search, ask, and build all work
- Ready for Claude Code agents and workflows
2025-08-05 12:27:58 -07:00
Andy Lee
8eee90bf80 docs: add a link 2025-08-04 20:10:14 -07:00
Andy Lee
649d4ad03e docs: Address all configuration guide feedback
- Fix grammar: 'If time is not a constraint' instead of 'time expense is not large'
- Highlight Qwen3-Embedding-0.6B performance (nearly OpenAI API level)
- Add OpenAI quick start section with configuration example
- Fold Cloud vs Local trade-offs into collapsible section
- Update HNSW as 'default and recommended for extreme low storage'
- Add DiskANN beta warning and explain PQ+rerank architecture
- Expand Ollama models: add qwen3:0.6b, 4b, 7b variants
- Note OpenAI as current default but recommend Ollama switch
- Add 'need to install extra software' warning for Ollama
- Remove incorrect latency numbers from search-complexity recommendations
2025-08-04 20:01:23 -07:00
Andy Lee
d9b6f195c5 docs: Improve configuration guide based on feedback
- List specific files in default data/ directory (2 AI papers, literature, tech report)
- Update examples to use English and better RAG-suitable queries
- Change full dataset reference to use --max-items -1
- Adjust small model guidance about upgrading to larger models when time allows
- Update top-k defaults to reflect actual default of 20
- Ensure consistent use of full model name Qwen/Qwen3-Embedding-0.6B
- Reorder optimization steps, move MLX to third position
- Remove incorrect chunk size tuning guidance
- Change README from 'Having trouble' to 'Need best practices'
2025-08-04 19:29:17 -07:00
Andy Lee
00f506c0bd docs: Adjust DiskANN positioning in features and roadmap
- features.md: Put HNSW/FAISS first as default, DiskANN as optional
- roadmap.md: Reorder to show HNSW integration before DiskANN
- Consistent with positioning DiskANN as advanced option for large-scale use
2025-08-04 17:53:27 -07:00
Andy Lee
e872dd1d23 docs: Weaken DiskANN emphasis in README
- Change backend description to emphasize HNSW as default
- DiskANN positioned as optional for billion-scale datasets
- Simplify evaluation commands to be more generic
2025-08-04 17:51:21 -07:00
Andy Lee
063c687ff7 chore: move evaluation data .gitattributes to correct location 2025-08-04 17:46:17 -07:00
Andy Lee
bb8ecd54d7 feat: add comprehensive configuration guide and update README
- Create docs/configuration-guide.md with detailed guidance on:
  - Embedding model selection (small/medium/large)
  - Index selection (HNSW vs DiskANN)
  - LLM engine and model comparison
  - Parameter tuning (build/search complexity, top-k)
  - Performance optimization tips
  - Deep dive into LEANN's recomputation feature
- Update README.md to link to the configuration guide
- Include latest 2025 model recommendations (Qwen3, DeepSeek-R1, O3-mini)
2025-08-04 17:41:27 -07:00
Andy Lee
716217ae24 docs: config guidance 2025-08-04 16:21:13 -07:00
6 changed files with 75 additions and 108 deletions

View File

@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
[project] [project]
name = "leann-backend-diskann" name = "leann-backend-diskann"
version = "0.2.2" version = "0.2.1"
dependencies = ["leann-core==0.2.2", "numpy", "protobuf>=3.19.0"] dependencies = ["leann-core==0.2.1", "numpy", "protobuf>=3.19.0"]
[tool.scikit-build] [tool.scikit-build]
# Key: simplified CMake path # Key: simplified CMake path

View File

@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
[project] [project]
name = "leann-backend-hnsw" name = "leann-backend-hnsw"
version = "0.2.2" version = "0.2.1"
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit." description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
dependencies = [ dependencies = [
"leann-core==0.2.2", "leann-core==0.2.1",
"numpy", "numpy",
"pyzmq>=23.0.0", "pyzmq>=23.0.0",
"msgpack>=1.0.0", "msgpack>=1.0.0",

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "leann-core" name = "leann-core"
version = "0.2.2" version = "0.2.1"
description = "Core API and plugin system for LEANN" description = "Core API and plugin system for LEANN"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@@ -75,7 +75,6 @@ class LeannCLI:
epilog=""" epilog="""
Examples: Examples:
leann build my-docs --docs ./documents # Build index named my-docs leann build my-docs --docs ./documents # Build index named my-docs
leann build my-ppts --docs ./ --file-types .pptx,.pdf # Index only PowerPoint and PDF files
leann search my-docs "query" # Search in my-docs index leann search my-docs "query" # Search in my-docs index
leann ask my-docs "question" # Ask my-docs index leann ask my-docs "question" # Ask my-docs index
leann list # List all stored indexes leann list # List all stored indexes
@@ -100,11 +99,6 @@ Examples:
build_parser.add_argument("--num-threads", type=int, default=1) build_parser.add_argument("--num-threads", type=int, default=1)
build_parser.add_argument("--compact", action="store_true", default=True) build_parser.add_argument("--compact", action="store_true", default=True)
build_parser.add_argument("--recompute", action="store_true", default=True) build_parser.add_argument("--recompute", action="store_true", default=True)
build_parser.add_argument(
"--file-types",
type=str,
help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
)
# Search command # Search command
search_parser = subparsers.add_parser("search", help="Search documents") search_parser = subparsers.add_parser("search", help="Search documents")
@@ -114,12 +108,7 @@ Examples:
search_parser.add_argument("--complexity", type=int, default=64) search_parser.add_argument("--complexity", type=int, default=64)
search_parser.add_argument("--beam-width", type=int, default=1) search_parser.add_argument("--beam-width", type=int, default=1)
search_parser.add_argument("--prune-ratio", type=float, default=0.0) search_parser.add_argument("--prune-ratio", type=float, default=0.0)
search_parser.add_argument( search_parser.add_argument("--recompute-embeddings", action="store_true")
"--recompute-embeddings",
action="store_true",
default=True,
help="Recompute embeddings (default: True)",
)
search_parser.add_argument( search_parser.add_argument(
"--pruning-strategy", "--pruning-strategy",
choices=["global", "local", "proportional"], choices=["global", "local", "proportional"],
@@ -142,12 +131,7 @@ Examples:
ask_parser.add_argument("--complexity", type=int, default=32) ask_parser.add_argument("--complexity", type=int, default=32)
ask_parser.add_argument("--beam-width", type=int, default=1) ask_parser.add_argument("--beam-width", type=int, default=1)
ask_parser.add_argument("--prune-ratio", type=float, default=0.0) ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
ask_parser.add_argument( ask_parser.add_argument("--recompute-embeddings", action="store_true")
"--recompute-embeddings",
action="store_true",
default=True,
help="Recompute embeddings (default: True)",
)
ask_parser.add_argument( ask_parser.add_argument(
"--pruning-strategy", "--pruning-strategy",
choices=["global", "local", "proportional"], choices=["global", "local", "proportional"],
@@ -270,10 +254,8 @@ Examples:
print(f' leann search {example_name} "your query"') print(f' leann search {example_name} "your query"')
print(f" leann ask {example_name} --interactive") print(f" leann ask {example_name} --interactive")
def load_documents(self, docs_dir: str, custom_file_types: str | None = None): def load_documents(self, docs_dir: str):
print(f"Loading documents from {docs_dir}...") print(f"Loading documents from {docs_dir}...")
if custom_file_types:
print(f"Using custom file types: {custom_file_types}")
# Try to use better PDF parsers first # Try to use better PDF parsers first
documents = [] documents = []
@@ -305,19 +287,11 @@ Examples:
documents.extend(default_docs) documents.extend(default_docs)
# Load other file types with default reader # Load other file types with default reader
if custom_file_types:
# Parse custom file types from comma-separated string
code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
# Ensure extensions start with a dot
code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
else:
# Use default supported file types
code_extensions = [ code_extensions = [
# Original document types # Original document types
".txt", ".txt",
".md", ".md",
".docx", ".docx",
".pptx",
# Code files for Claude Code integration # Code files for Claude Code integration
".py", ".py",
".js", ".js",
@@ -366,8 +340,6 @@ Examples:
".py", ".py",
".jl", ".jl",
] ]
# Try to load other file types, but don't fail if none are found
try:
other_docs = SimpleDirectoryReader( other_docs = SimpleDirectoryReader(
docs_dir, docs_dir,
recursive=True, recursive=True,
@@ -375,11 +347,6 @@ Examples:
required_exts=code_extensions, required_exts=code_extensions,
).load_data(show_progress=True) ).load_data(show_progress=True)
documents.extend(other_docs) documents.extend(other_docs)
except ValueError as e:
if "No files found" in str(e):
print("No additional files found for other supported types.")
else:
raise e
all_texts = [] all_texts = []
@@ -457,7 +424,7 @@ Examples:
print(f"Index '{index_name}' already exists. Use --force to rebuild.") print(f"Index '{index_name}' already exists. Use --force to rebuild.")
return return
all_texts = self.load_documents(docs_dir, args.file_types) all_texts = self.load_documents(docs_dir)
if not all_texts: if not all_texts:
print("No documents found") print("No documents found")
return return

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "leann" name = "leann"
version = "0.2.2" version = "0.2.1"
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!" description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"