Compare commits

..

7 Commits

Author SHA1 Message Date
GitHub Actions
075d4bd167 chore: release v0.2.2 2025-08-08 01:58:40 +00:00
yichuan520030910320
e4bcc76f88 fix cli & make recompute default true 2025-08-07 18:58:04 -07:00
yichuan520030910320
710e83b1fd fix cli if there is no other type of doc to make it robust 2025-08-07 18:46:05 -07:00
yichuan520030910320
c96d653072 more support for type of docs in cli 2025-08-07 18:14:03 -07:00
Andy Lee
8b22d2b5d3 Merge pull request #19 from yichuan-w/feature/claude-code-research
Feature/claude code research
2025-08-05 23:02:34 -07:00
Andy Lee
4cb544ee38 docs: Update co-contributors with GitHub usernames (#18)
* docs: Update co-contributors with GitHub usernames

* docs: Use GitHub links for co-contributors and improve order

* docs: Change to Contributors and use personal homepage

* docs: Specify core contributors and welcome new contributors
2025-08-05 17:43:59 -07:00
yichuan520030910320
f94ce63d51 add gpt oss! serve your RAG using ollama 2025-08-05 16:49:52 -07:00
16 changed files with 902 additions and 60 deletions

View File

@@ -18,6 +18,8 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg
**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy. **Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
> **🚀 NEW: Claude Code Integration!** LEANN now provides native MCP integration for Claude Code users. Index your codebase and get intelligent code assistance directly in Claude Code. [Setup Guide →](packages/leann-mcp/README.md)
## Why LEANN? ## Why LEANN?
@@ -166,7 +168,7 @@ ollama pull llama3.2:1b
</details> </details>
### Flexible Configuration ### Flexible Configuration
LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs. LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs.
@@ -191,6 +193,7 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
# LLM Parameters (Text generation models) # LLM Parameters (Text generation models)
--llm TYPE # LLM backend: openai, ollama, or hf (default: openai) --llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
--llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct --llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
--thinking-budget LEVEL # Thinking budget for reasoning models: low/medium/high (supported by o3, o3-mini, GPT-Oss:20b, and other reasoning models)
# Search Parameters # Search Parameters
--top-k N # Number of results to retrieve (default: 20) --top-k N # Number of results to retrieve (default: 20)
@@ -427,7 +430,7 @@ source .venv/bin/activate
leann --help leann --help
``` ```
**To make it globally available (recommended for daily use):** **To make it globally available:**
```bash ```bash
# Install the LEANN CLI globally using uv tool # Install the LEANN CLI globally using uv tool
uv tool install leann uv tool install leann
@@ -436,12 +439,17 @@ uv tool install leann
leann --help leann --help
``` ```
> **Note**: Global installation is required for Claude Code integration. The `leann_mcp` server depends on the globally available `leann` command.
### Usage Examples ### Usage Examples
```bash ```bash
# Build an index from documents # Build an index from current directory (default)
leann build my-docs
# Or from specific directory
leann build my-docs --docs ./documents leann build my-docs --docs ./documents
# Search your documents # Search your documents

View File

@@ -100,6 +100,13 @@ class BaseRAGExample(ABC):
default="http://localhost:11434", default="http://localhost:11434",
help="Host for Ollama API (default: http://localhost:11434)", help="Host for Ollama API (default: http://localhost:11434)",
) )
llm_group.add_argument(
"--thinking-budget",
type=str,
choices=["low", "medium", "high"],
default=None,
help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
)
# Search parameters # Search parameters
search_group = parser.add_argument_group("Search Parameters") search_group = parser.add_argument_group("Search Parameters")
@@ -228,7 +235,17 @@ class BaseRAGExample(ABC):
if not query: if not query:
continue continue
response = chat.ask(query, top_k=args.top_k, complexity=args.search_complexity) # Prepare LLM kwargs with thinking budget if specified
llm_kwargs = {}
if hasattr(args, "thinking_budget") and args.thinking_budget:
llm_kwargs["thinking_budget"] = args.thinking_budget
response = chat.ask(
query,
top_k=args.top_k,
complexity=args.search_complexity,
llm_kwargs=llm_kwargs,
)
print(f"\nAssistant: {response}\n") print(f"\nAssistant: {response}\n")
except KeyboardInterrupt: except KeyboardInterrupt:
@@ -247,7 +264,15 @@ class BaseRAGExample(ABC):
) )
print(f"\n[Query]: \033[36m{query}\033[0m") print(f"\n[Query]: \033[36m{query}\033[0m")
response = chat.ask(query, top_k=args.top_k, complexity=args.search_complexity)
# Prepare LLM kwargs with thinking budget if specified
llm_kwargs = {}
if hasattr(args, "thinking_budget") and args.thinking_budget:
llm_kwargs["thinking_budget"] = args.thinking_budget
response = chat.ask(
query, top_k=args.top_k, complexity=args.search_complexity, llm_kwargs=llm_kwargs
)
print(f"\n[Response]: \033[36m{response}\033[0m") print(f"\n[Response]: \033[36m{response}\033[0m")
async def run(self): async def run(self):

View File

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

View File

@@ -0,0 +1,123 @@
# Thinking Budget Feature Implementation
## Overview
This document describes the implementation of the **thinking budget** feature for LEANN, which allows users to control the computational effort for reasoning models like GPT-Oss:20b.
## Feature Description
The thinking budget feature provides three levels of computational effort for reasoning models:
- **`low`**: Fast responses, basic reasoning (default for simple queries)
- **`medium`**: Balanced speed and reasoning depth
- **`high`**: Maximum reasoning effort, best for complex analytical questions
## Implementation Details
### 1. Command Line Interface
Added `--thinking-budget` parameter to both CLI and RAG examples:
```bash
# LEANN CLI
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
# RAG Examples
python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
python apps/document_rag.py --llm openai --llm-model o3 --thinking-budget medium
```
### 2. LLM Backend Support
#### Ollama Backend (`packages/leann-core/src/leann/chat.py`)
```python
def ask(self, prompt: str, **kwargs) -> str:
# Handle thinking budget for reasoning models
options = kwargs.copy()
thinking_budget = kwargs.get("thinking_budget")
if thinking_budget:
options.pop("thinking_budget", None)
if thinking_budget in ["low", "medium", "high"]:
options["reasoning"] = {"effort": thinking_budget, "exclude": False}
```
**API Format**: Uses Ollama's `reasoning` parameter with `effort` and `exclude` fields.
#### OpenAI Backend (`packages/leann-core/src/leann/chat.py`)
```python
def ask(self, prompt: str, **kwargs) -> str:
# Handle thinking budget for reasoning models
thinking_budget = kwargs.get("thinking_budget")
if thinking_budget and thinking_budget in ["low", "medium", "high"]:
# Check if this is an o-series model
o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
if any(model in self.model for model in o_series_models):
params["reasoning_effort"] = thinking_budget
```
**API Format**: Uses OpenAI's `reasoning_effort` parameter for o-series models.
### 3. Parameter Propagation
The thinking budget parameter is properly propagated through the LEANN architecture:
1. **CLI** (`packages/leann-core/src/leann/cli.py`): Captures `--thinking-budget` argument
2. **Base RAG** (`apps/base_rag_example.py`): Adds parameter to argument parser
3. **LeannChat** (`packages/leann-core/src/leann/api.py`): Passes `llm_kwargs` to LLM
4. **LLM Interface**: Handles the parameter in backend-specific implementations
## Files Modified
### Core Implementation
- `packages/leann-core/src/leann/chat.py`: Added thinking budget support to OllamaChat and OpenAIChat
- `packages/leann-core/src/leann/cli.py`: Added `--thinking-budget` argument
- `apps/base_rag_example.py`: Added thinking budget parameter to RAG examples
### Documentation
- `README.md`: Added thinking budget parameter to usage examples
- `docs/configuration-guide.md`: Added detailed documentation and usage guidelines
### Examples
- `examples/thinking_budget_demo.py`: Comprehensive demo script with usage examples
## Usage Examples
### Basic Usage
```bash
# High reasoning effort for complex questions
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
# Medium reasoning for balanced performance
leann ask my-index --llm openai --model gpt-4o --thinking-budget medium
# Low reasoning for fast responses
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget low
```
### RAG Examples
```bash
# Email RAG with high reasoning
python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
# Document RAG with medium reasoning
python apps/document_rag.py --llm openai --llm-model gpt-4o --thinking-budget medium
```
## Supported Models
### Ollama Models
- **GPT-Oss:20b**: Primary target model with reasoning capabilities
- **Other reasoning models**: Any Ollama model that supports the `reasoning` parameter
### OpenAI Models
- **o3, o3-mini, o4-mini, o1**: o-series reasoning models with `reasoning_effort` parameter
- **GPT-OSS models**: Models that support reasoning capabilities
## Testing
The implementation includes comprehensive testing:
- Parameter handling verification
- Backend-specific API format validation
- CLI argument parsing tests
- Integration with existing LEANN architecture

View File

@@ -0,0 +1,150 @@
# Claude Code x LEANN 集成指南
## ✅ 现状:已经可以工作!
好消息LEANN CLI已经完全可以在Claude Code中使用无需任何修改
## 🚀 立即开始
### 1. 激活环境
```bash
# 在LEANN项目目录下
source .venv/bin/activate.fish # fish shell
# 或
source .venv/bin/activate # bash shell
```
### 2. 基本命令
#### 查看现有索引
```bash
leann list
```
#### 搜索文档
```bash
leann search my-docs "machine learning" --recompute-embeddings
```
#### 问答对话
```bash
echo "What is machine learning?" | leann ask my-docs --llm ollama --model qwen3:8b --recompute-embeddings
```
#### 构建新索引
```bash
leann build project-docs --docs ./src --recompute-embeddings
```
## 💡 Claude Code 使用技巧
### 在Claude Code中直接使用
1. **激活环境**
```bash
cd /Users/andyl/Projects/LEANN-RAG
source .venv/bin/activate.fish
```
2. **搜索代码库**
```bash
leann search my-docs "authentication patterns" --recompute-embeddings --top-k 10
```
3. **智能问答**
```bash
echo "How does the authentication system work?" | leann ask my-docs --llm ollama --model qwen3:8b --recompute-embeddings
```
### 批量操作示例
```bash
# 构建项目文档索引
leann build project-docs --docs ./docs --force
# 搜索多个关键词
leann search project-docs "API authentication" --recompute-embeddings
leann search project-docs "database schema" --recompute-embeddings
leann search project-docs "deployment guide" --recompute-embeddings
# 问答模式
echo "What are the API endpoints?" | leann ask project-docs --recompute-embeddings
```
## 🎯 Claude 可以立即执行的工作流
### 代码分析工作流
```bash
# 1. 构建代码库索引
leann build codebase --docs ./src --backend hnsw --recompute-embeddings
# 2. 分析架构
echo "What is the overall architecture?" | leann ask codebase --recompute-embeddings
# 3. 查找特定功能
leann search codebase "user authentication" --recompute-embeddings --top-k 5
# 4. 理解实现细节
echo "How is user authentication implemented?" | leann ask codebase --recompute-embeddings
```
### 文档理解工作流
```bash
# 1. 索引项目文档
leann build docs --docs ./docs --recompute-embeddings
# 2. 快速查找信息
leann search docs "installation requirements" --recompute-embeddings
# 3. 获取详细说明
echo "What are the system requirements?" | leann ask docs --recompute-embeddings
```
## ⚠️ 重要提示
1. **必须使用 `--recompute-embeddings`** - 这是关键参数,不加会报错
2. **需要先激活虚拟环境** - 确保有LEANN的Python环境
3. **Ollama需要预先安装** - ask功能需要本地LLM
## 🔥 立即可用的Claude提示词
```
Help me analyze this codebase using LEANN:
1. First, activate the environment:
cd /Users/andyl/Projects/LEANN-RAG && source .venv/bin/activate.fish
2. Build an index of the source code:
leann build codebase --docs ./src --recompute-embeddings
3. Search for authentication patterns:
leann search codebase "authentication middleware" --recompute-embeddings --top-k 10
4. Ask about the authentication system:
echo "How does user authentication work in this codebase?" | leann ask codebase --recompute-embeddings
Please execute these commands and help me understand the code structure.
```
## 📈 下一步改进计划
虽然现在已经可以用,但还可以进一步优化:
1. **简化命令** - 默认启用recompute-embeddings
2. **配置文件** - 避免重复输入参数
3. **状态管理** - 自动检测环境和索引
4. **输出格式** - 更适合Claude解析的格式
但这些都是锦上添花,现在就能用起来!
## 🎉 总结
**LEANN现在就可以在Claude Code中完美工作**
- ✅ 搜索功能正常
- ✅ RAG问答功能正常
- ✅ 索引构建功能正常
- ✅ 支持多种数据源
- ✅ 支持本地LLM
只需要记住加上 `--recompute-embeddings` 参数就行!

View File

@@ -103,13 +103,15 @@ For immediate testing without local model downloads:
**OpenAI** (`--llm openai`) **OpenAI** (`--llm openai`)
- **Pros**: Best quality, consistent performance, no local resources needed - **Pros**: Best quality, consistent performance, no local resources needed
- **Cons**: Costs money ($0.15-2.5 per million tokens), requires internet, data privacy concerns - **Cons**: Costs money ($0.15-2.5 per million tokens), requires internet, data privacy concerns
- **Models**: `gpt-4o-mini` (fast, cheap), `gpt-4o` (best quality), `o3-mini` (reasoning, not so expensive) - **Models**: `gpt-4o-mini` (fast, cheap), `gpt-4o` (best quality), `o3` (reasoning), `o3-mini` (reasoning, cheaper)
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for o-series reasoning models (o3, o3-mini, o4-mini)
- **Note**: Our current default, but we recommend switching to Ollama for most use cases - **Note**: Our current default, but we recommend switching to Ollama for most use cases
**Ollama** (`--llm ollama`) **Ollama** (`--llm ollama`)
- **Pros**: Fully local, free, privacy-preserving, good model variety - **Pros**: Fully local, free, privacy-preserving, good model variety
- **Cons**: Requires local GPU/CPU resources, slower than cloud APIs, need to install extra [ollama app](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and pre-download models by `ollama pull` - **Cons**: Requires local GPU/CPU resources, slower than cloud APIs, need to install extra [ollama app](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and pre-download models by `ollama pull`
- **Models**: `qwen3:0.6b` (ultra-fast), `qwen3:1.7b` (balanced), `qwen3:4b` (good quality), `qwen3:7b` (high quality), `deepseek-r1:1.5b` (reasoning) - **Models**: `qwen3:0.6b` (ultra-fast), `qwen3:1.7b` (balanced), `qwen3:4b` (good quality), `qwen3:7b` (high quality), `deepseek-r1:1.5b` (reasoning)
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for reasoning models like GPT-Oss:20b
**HuggingFace** (`--llm hf`) **HuggingFace** (`--llm hf`)
- **Pros**: Free tier available, huge model selection, direct model loading (vs Ollama's server-based approach) - **Pros**: Free tier available, huge model selection, direct model loading (vs Ollama's server-based approach)
@@ -151,6 +153,36 @@ For immediate testing without local model downloads:
- LLM processing time ∝ top_k × chunk_size - LLM processing time ∝ top_k × chunk_size
- Total context = top_k × chunk_size tokens - Total context = top_k × chunk_size tokens
### Thinking Budget for Reasoning Models
**`--thinking-budget`** (reasoning effort level)
- Controls the computational effort for reasoning models
- Options: `low`, `medium`, `high`
- Guidelines:
- `low`: Fast responses, basic reasoning (default for simple queries)
- `medium`: Balanced speed and reasoning depth
- `high`: Maximum reasoning effort, best for complex analytical questions
- **Supported Models**:
- **Ollama**: `gpt-oss:20b`, `gpt-oss:120b`
- **OpenAI**: `o3`, `o3-mini`, `o4-mini`, `o1` (o-series reasoning models)
- **Note**: Models without reasoning support will show a warning and proceed without reasoning parameters
- **Example**: `--thinking-budget high` for complex analytical questions
**📖 For detailed usage examples and implementation details, check out [Thinking Budget Documentation](THINKING_BUDGET_FEATURE.md)**
**💡 Quick Examples:**
```bash
# OpenAI o-series reasoning model
python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
--index-dir hnswbuild --backend hnsw \
--llm openai --llm-model o3 --thinking-budget medium
# Ollama reasoning model
python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
--index-dir hnswbuild --backend hnsw \
--llm ollama --llm-model gpt-oss:20b --thinking-budget high
```
### Graph Degree (HNSW/DiskANN) ### Graph Degree (HNSW/DiskANN)
**`--graph-degree`** **`--graph-degree`**

View File

@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
[project] [project]
name = "leann-backend-diskann" name = "leann-backend-diskann"
version = "0.2.1" version = "0.2.2"
dependencies = ["leann-core==0.2.1", "numpy", "protobuf>=3.19.0"] dependencies = ["leann-core==0.2.2", "numpy", "protobuf>=3.19.0"]
[tool.scikit-build] [tool.scikit-build]
# Key: simplified CMake path # Key: simplified CMake path

View File

@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
[project] [project]
name = "leann-backend-hnsw" name = "leann-backend-hnsw"
version = "0.2.1" version = "0.2.2"
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit." description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
dependencies = [ dependencies = [
"leann-core==0.2.1", "leann-core==0.2.2",
"numpy", "numpy",
"pyzmq>=23.0.0", "pyzmq>=23.0.0",
"msgpack>=1.0.0", "msgpack>=1.0.0",

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "leann-core" name = "leann-core"
version = "0.2.1" version = "0.2.2"
description = "Core API and plugin system for LEANN" description = "Core API and plugin system for LEANN"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"
@@ -44,6 +44,7 @@ colab = [
[project.scripts] [project.scripts]
leann = "leann.cli:main" leann = "leann.cli:main"
leann_mcp = "leann.mcp:main"
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
where = ["src"] where = ["src"]

View File

@@ -489,11 +489,35 @@ class OllamaChat(LLMInterface):
import requests import requests
full_url = f"{self.host}/api/generate" full_url = f"{self.host}/api/generate"
# Handle thinking budget for reasoning models
options = kwargs.copy()
thinking_budget = kwargs.get("thinking_budget")
if thinking_budget:
# Remove thinking_budget from options as it's not a standard Ollama option
options.pop("thinking_budget", None)
# Only apply reasoning parameters to models that support it
reasoning_supported_models = [
"gpt-oss:20b",
"gpt-oss:120b",
"deepseek-r1",
"deepseek-coder",
]
if thinking_budget in ["low", "medium", "high"]:
if any(model in self.model.lower() for model in reasoning_supported_models):
options["reasoning"] = {"effort": thinking_budget, "exclude": False}
logger.info(f"Applied reasoning effort={thinking_budget} to model {self.model}")
else:
logger.warning(
f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
)
payload = { payload = {
"model": self.model, "model": self.model,
"prompt": prompt, "prompt": prompt,
"stream": False, # Keep it simple for now "stream": False, # Keep it simple for now
"options": kwargs, "options": options,
} }
logger.debug(f"Sending request to Ollama: {payload}") logger.debug(f"Sending request to Ollama: {payload}")
try: try:
@@ -684,11 +708,38 @@ class OpenAIChat(LLMInterface):
params = { params = {
"model": self.model, "model": self.model,
"messages": [{"role": "user", "content": prompt}], "messages": [{"role": "user", "content": prompt}],
"max_tokens": kwargs.get("max_tokens", 1000),
"temperature": kwargs.get("temperature", 0.7), "temperature": kwargs.get("temperature", 0.7),
**{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]},
} }
# Handle max_tokens vs max_completion_tokens based on model
max_tokens = kwargs.get("max_tokens", 1000)
if "o3" in self.model or "o4" in self.model or "o1" in self.model:
# o-series models use max_completion_tokens
params["max_completion_tokens"] = max_tokens
params["temperature"] = 1.0
else:
# Other models use max_tokens
params["max_tokens"] = max_tokens
# Handle thinking budget for reasoning models
thinking_budget = kwargs.get("thinking_budget")
if thinking_budget and thinking_budget in ["low", "medium", "high"]:
# Check if this is an o-series model (partial match for model names)
o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
if any(model in self.model for model in o_series_models):
# Use the correct OpenAI reasoning parameter format
params["reasoning_effort"] = thinking_budget
logger.info(f"Applied reasoning_effort={thinking_budget} to model {self.model}")
else:
logger.warning(
f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
)
# Add other kwargs (excluding thinking_budget as it's handled above)
for k, v in kwargs.items():
if k not in ["max_tokens", "temperature", "thinking_budget"]:
params[k] = v
logger.info(f"Sending request to OpenAI with model {self.model}") logger.info(f"Sending request to OpenAI with model {self.model}")
try: try:

View File

@@ -41,13 +41,23 @@ def extract_pdf_text_with_pdfplumber(file_path: str) -> str:
class LeannCLI: class LeannCLI:
def __init__(self): def __init__(self):
self.indexes_dir = Path.home() / ".leann" / "indexes" # Always use project-local .leann directory (like .git)
self.indexes_dir = Path.cwd() / ".leann" / "indexes"
self.indexes_dir.mkdir(parents=True, exist_ok=True) self.indexes_dir.mkdir(parents=True, exist_ok=True)
# Default parser for documents
self.node_parser = SentenceSplitter( self.node_parser = SentenceSplitter(
chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n" chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
) )
# Code-optimized parser
self.code_parser = SentenceSplitter(
chunk_size=512, # Larger chunks for code context
chunk_overlap=50, # Less overlap to preserve function boundaries
separator="\n", # Split by lines for code
paragraph_separator="\n\n", # Preserve logical code blocks
)
def get_index_path(self, index_name: str) -> str: def get_index_path(self, index_name: str) -> str:
index_dir = self.indexes_dir / index_name index_dir = self.indexes_dir / index_name
return str(index_dir / "documents.leann") return str(index_dir / "documents.leann")
@@ -64,10 +74,11 @@ class LeannCLI:
formatter_class=argparse.RawDescriptionHelpFormatter, formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=""" epilog="""
Examples: Examples:
leann build my-docs --docs ./documents # Build index named my-docs leann build my-docs --docs ./documents # Build index named my-docs
leann search my-docs "query" # Search in my-docs index leann build my-ppts --docs ./ --file-types .pptx,.pdf # Index only PowerPoint and PDF files
leann ask my-docs "question" # Ask my-docs index leann search my-docs "query" # Search in my-docs index
leann list # List all stored indexes leann ask my-docs "question" # Ask my-docs index
leann list # List all stored indexes
""", """,
) )
@@ -76,7 +87,9 @@ Examples:
# Build command # Build command
build_parser = subparsers.add_parser("build", help="Build document index") build_parser = subparsers.add_parser("build", help="Build document index")
build_parser.add_argument("index_name", help="Index name") build_parser.add_argument("index_name", help="Index name")
build_parser.add_argument("--docs", type=str, required=True, help="Documents directory") build_parser.add_argument(
"--docs", type=str, default=".", help="Documents directory (default: current directory)"
)
build_parser.add_argument( build_parser.add_argument(
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"] "--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
) )
@@ -87,6 +100,11 @@ Examples:
build_parser.add_argument("--num-threads", type=int, default=1) build_parser.add_argument("--num-threads", type=int, default=1)
build_parser.add_argument("--compact", action="store_true", default=True) build_parser.add_argument("--compact", action="store_true", default=True)
build_parser.add_argument("--recompute", action="store_true", default=True) build_parser.add_argument("--recompute", action="store_true", default=True)
build_parser.add_argument(
"--file-types",
type=str,
help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
)
# Search command # Search command
search_parser = subparsers.add_parser("search", help="Search documents") search_parser = subparsers.add_parser("search", help="Search documents")
@@ -96,7 +114,12 @@ Examples:
search_parser.add_argument("--complexity", type=int, default=64) search_parser.add_argument("--complexity", type=int, default=64)
search_parser.add_argument("--beam-width", type=int, default=1) search_parser.add_argument("--beam-width", type=int, default=1)
search_parser.add_argument("--prune-ratio", type=float, default=0.0) search_parser.add_argument("--prune-ratio", type=float, default=0.0)
search_parser.add_argument("--recompute-embeddings", action="store_true") search_parser.add_argument(
"--recompute-embeddings",
action="store_true",
default=True,
help="Recompute embeddings (default: True)",
)
search_parser.add_argument( search_parser.add_argument(
"--pruning-strategy", "--pruning-strategy",
choices=["global", "local", "proportional"], choices=["global", "local", "proportional"],
@@ -119,52 +142,138 @@ Examples:
ask_parser.add_argument("--complexity", type=int, default=32) ask_parser.add_argument("--complexity", type=int, default=32)
ask_parser.add_argument("--beam-width", type=int, default=1) ask_parser.add_argument("--beam-width", type=int, default=1)
ask_parser.add_argument("--prune-ratio", type=float, default=0.0) ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
ask_parser.add_argument("--recompute-embeddings", action="store_true") ask_parser.add_argument(
"--recompute-embeddings",
action="store_true",
default=True,
help="Recompute embeddings (default: True)",
)
ask_parser.add_argument( ask_parser.add_argument(
"--pruning-strategy", "--pruning-strategy",
choices=["global", "local", "proportional"], choices=["global", "local", "proportional"],
default="global", default="global",
) )
ask_parser.add_argument(
"--thinking-budget",
type=str,
choices=["low", "medium", "high"],
default=None,
help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
)
# List command # List command
subparsers.add_parser("list", help="List all indexes") subparsers.add_parser("list", help="List all indexes")
return parser return parser
def register_project_dir(self):
"""Register current project directory in global registry"""
global_registry = Path.home() / ".leann" / "projects.json"
global_registry.parent.mkdir(exist_ok=True)
current_dir = str(Path.cwd())
# Load existing registry
projects = []
if global_registry.exists():
try:
import json
with open(global_registry) as f:
projects = json.load(f)
except Exception:
projects = []
# Add current directory if not already present
if current_dir not in projects:
projects.append(current_dir)
# Save registry
import json
with open(global_registry, "w") as f:
json.dump(projects, f, indent=2)
def list_indexes(self): def list_indexes(self):
print("Stored LEANN indexes:") print("Stored LEANN indexes:")
if not self.indexes_dir.exists(): # Get all project directories with .leann
global_registry = Path.home() / ".leann" / "projects.json"
all_projects = []
if global_registry.exists():
try:
import json
with open(global_registry) as f:
all_projects = json.load(f)
except Exception:
pass
# Filter to only existing directories with .leann
valid_projects = []
for project_dir in all_projects:
project_path = Path(project_dir)
if project_path.exists() and (project_path / ".leann" / "indexes").exists():
valid_projects.append(project_path)
# Add current project if it has .leann but not in registry
current_path = Path.cwd()
if (current_path / ".leann" / "indexes").exists() and current_path not in valid_projects:
valid_projects.append(current_path)
if not valid_projects:
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.") print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
return return
index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()] total_indexes = 0
current_dir = Path.cwd()
if not index_dirs: for project_path in valid_projects:
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.") indexes_dir = project_path / ".leann" / "indexes"
return if not indexes_dir.exists():
continue
print(f"Found {len(index_dirs)} indexes:") index_dirs = [d for d in indexes_dir.iterdir() if d.is_dir()]
for i, index_dir in enumerate(index_dirs, 1): if not index_dirs:
index_name = index_dir.name continue
status = "" if self.index_exists(index_name) else ""
print(f" {i}. {index_name} [{status}]") # Show project header
if self.index_exists(index_name): if project_path == current_dir:
index_dir / "documents.leann.meta.json" print(f"\n📁 Current project ({project_path}):")
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / ( else:
1024 * 1024 print(f"\n📂 {project_path}:")
)
print(f" Size: {size_mb:.1f} MB")
if index_dirs: for index_dir in index_dirs:
example_name = index_dirs[0].name total_indexes += 1
print("\nUsage:") index_name = index_dir.name
print(f' leann search {example_name} "your query"') meta_file = index_dir / "documents.leann.meta.json"
print(f" leann ask {example_name} --interactive") status = "" if meta_file.exists() else ""
def load_documents(self, docs_dir: str): print(f" {total_indexes}. {index_name} [{status}]")
if status == "":
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
1024 * 1024
)
print(f" Size: {size_mb:.1f} MB")
if total_indexes > 0:
print(f"\nTotal: {total_indexes} indexes across {len(valid_projects)} projects")
print("\nUsage (current project only):")
# Show example from current project
current_indexes_dir = current_dir / ".leann" / "indexes"
if current_indexes_dir.exists():
current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()]
if current_index_dirs:
example_name = current_index_dirs[0].name
print(f' leann search {example_name} "your query"')
print(f" leann ask {example_name} --interactive")
def load_documents(self, docs_dir: str, custom_file_types: str | None = None):
print(f"Loading documents from {docs_dir}...") print(f"Loading documents from {docs_dir}...")
if custom_file_types:
print(f"Using custom file types: {custom_file_types}")
# Try to use better PDF parsers first # Try to use better PDF parsers first
documents = [] documents = []
@@ -196,17 +305,140 @@ Examples:
documents.extend(default_docs) documents.extend(default_docs)
# Load other file types with default reader # Load other file types with default reader
other_docs = SimpleDirectoryReader( if custom_file_types:
docs_dir, # Parse custom file types from comma-separated string
recursive=True, code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
encoding="utf-8", # Ensure extensions start with a dot
required_exts=[".txt", ".md", ".docx"], code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
).load_data(show_progress=True) else:
documents.extend(other_docs) # Use default supported file types
code_extensions = [
# Original document types
".txt",
".md",
".docx",
".pptx",
# Code files for Claude Code integration
".py",
".js",
".ts",
".jsx",
".tsx",
".java",
".cpp",
".c",
".h",
".hpp",
".cs",
".go",
".rs",
".rb",
".php",
".swift",
".kt",
".scala",
".r",
".sql",
".sh",
".bash",
".zsh",
".fish",
".ps1",
".bat",
# Config and markup files
".json",
".yaml",
".yml",
".xml",
".toml",
".ini",
".cfg",
".conf",
".html",
".css",
".scss",
".less",
".vue",
".svelte",
# Data science
".ipynb",
".R",
".py",
".jl",
]
# Try to load other file types, but don't fail if none are found
try:
other_docs = SimpleDirectoryReader(
docs_dir,
recursive=True,
encoding="utf-8",
required_exts=code_extensions,
).load_data(show_progress=True)
documents.extend(other_docs)
except ValueError as e:
if "No files found" in str(e):
print("No additional files found for other supported types.")
else:
raise e
all_texts = [] all_texts = []
# Define code file extensions for intelligent chunking
code_file_exts = {
".py",
".js",
".ts",
".jsx",
".tsx",
".java",
".cpp",
".c",
".h",
".hpp",
".cs",
".go",
".rs",
".rb",
".php",
".swift",
".kt",
".scala",
".r",
".sql",
".sh",
".bash",
".zsh",
".fish",
".ps1",
".bat",
".json",
".yaml",
".yml",
".xml",
".toml",
".ini",
".cfg",
".conf",
".html",
".css",
".scss",
".less",
".vue",
".svelte",
".ipynb",
".R",
".jl",
}
for doc in documents: for doc in documents:
nodes = self.node_parser.get_nodes_from_documents([doc]) # Check if this is a code file based on source path
source_path = doc.metadata.get("source", "")
is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
# Use appropriate parser based on file type
parser = self.code_parser if is_code_file else self.node_parser
nodes = parser.get_nodes_from_documents([doc])
for node in nodes: for node in nodes:
all_texts.append(node.get_content()) all_texts.append(node.get_content())
@@ -219,11 +451,13 @@ Examples:
index_dir = self.indexes_dir / index_name index_dir = self.indexes_dir / index_name
index_path = self.get_index_path(index_name) index_path = self.get_index_path(index_name)
print(f"📂 Indexing: {Path(docs_dir).resolve()}")
if index_dir.exists() and not args.force: if index_dir.exists() and not args.force:
print(f"Index '{index_name}' already exists. Use --force to rebuild.") print(f"Index '{index_name}' already exists. Use --force to rebuild.")
return return
all_texts = self.load_documents(docs_dir) all_texts = self.load_documents(docs_dir, args.file_types)
if not all_texts: if not all_texts:
print("No documents found") print("No documents found")
return return
@@ -248,6 +482,9 @@ Examples:
builder.build_index(index_path) builder.build_index(index_path)
print(f"Index built at {index_path}") print(f"Index built at {index_path}")
# Register this project directory in global registry
self.register_project_dir()
async def search_documents(self, args): async def search_documents(self, args):
index_name = args.index_name index_name = args.index_name
query = args.query query = args.query
@@ -308,6 +545,11 @@ Examples:
if not user_input: if not user_input:
continue continue
# Prepare LLM kwargs with thinking budget if specified
llm_kwargs = {}
if args.thinking_budget:
llm_kwargs["thinking_budget"] = args.thinking_budget
response = chat.ask( response = chat.ask(
user_input, user_input,
top_k=args.top_k, top_k=args.top_k,
@@ -316,11 +558,17 @@ Examples:
prune_ratio=args.prune_ratio, prune_ratio=args.prune_ratio,
recompute_embeddings=args.recompute_embeddings, recompute_embeddings=args.recompute_embeddings,
pruning_strategy=args.pruning_strategy, pruning_strategy=args.pruning_strategy,
llm_kwargs=llm_kwargs,
) )
print(f"LEANN: {response}") print(f"LEANN: {response}")
else: else:
query = input("Enter your question: ").strip() query = input("Enter your question: ").strip()
if query: if query:
# Prepare LLM kwargs with thinking budget if specified
llm_kwargs = {}
if args.thinking_budget:
llm_kwargs["thinking_budget"] = args.thinking_budget
response = chat.ask( response = chat.ask(
query, query,
top_k=args.top_k, top_k=args.top_k,
@@ -329,6 +577,7 @@ Examples:
prune_ratio=args.prune_ratio, prune_ratio=args.prune_ratio,
recompute_embeddings=args.recompute_embeddings, recompute_embeddings=args.recompute_embeddings,
pruning_strategy=args.pruning_strategy, pruning_strategy=args.pruning_strategy,
llm_kwargs=llm_kwargs,
) )
print(f"LEANN: {response}") print(f"LEANN: {response}")

View File

@@ -0,0 +1,134 @@
#!/usr/bin/env python3
import json
import os
import subprocess
import sys
def handle_request(request):
if request.get("method") == "initialize":
return {
"jsonrpc": "2.0",
"id": request.get("id"),
"result": {
"capabilities": {"tools": {}},
"protocolVersion": "2024-11-05",
"serverInfo": {"name": "leann-mcp", "version": "1.0.0"},
},
}
elif request.get("method") == "tools/list":
return {
"jsonrpc": "2.0",
"id": request.get("id"),
"result": {
"tools": [
{
"name": "leann_search",
"description": "Search LEANN index",
"inputSchema": {
"type": "object",
"properties": {
"index_name": {"type": "string"},
"query": {"type": "string"},
"top_k": {"type": "integer", "default": 5},
},
"required": ["index_name", "query"],
},
},
{
"name": "leann_ask",
"description": "Ask question using LEANN RAG",
"inputSchema": {
"type": "object",
"properties": {
"index_name": {"type": "string"},
"question": {"type": "string"},
},
"required": ["index_name", "question"],
},
},
{
"name": "leann_list",
"description": "List all LEANN indexes",
"inputSchema": {"type": "object", "properties": {}},
},
]
},
}
elif request.get("method") == "tools/call":
tool_name = request["params"]["name"]
args = request["params"].get("arguments", {})
# Set working directory and environment
env = os.environ.copy()
cwd = "/Users/andyl/Projects/LEANN-RAG"
try:
if tool_name == "leann_search":
cmd = [
"leann",
"search",
args["index_name"],
args["query"],
"--recompute-embeddings",
f"--top-k={args.get('top_k', 5)}",
]
result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, env=env)
elif tool_name == "leann_ask":
cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b'
result = subprocess.run(
cmd, shell=True, capture_output=True, text=True, cwd=cwd, env=env
)
elif tool_name == "leann_list":
result = subprocess.run(
["leann", "list"], capture_output=True, text=True, cwd=cwd, env=env
)
return {
"jsonrpc": "2.0",
"id": request.get("id"),
"result": {
"content": [
{
"type": "text",
"text": result.stdout
if result.returncode == 0
else f"Error: {result.stderr}",
}
]
},
}
except Exception as e:
return {
"jsonrpc": "2.0",
"id": request.get("id"),
"error": {"code": -1, "message": str(e)},
}
def main():
for line in sys.stdin:
try:
request = json.loads(line.strip())
response = handle_request(request)
if response:
print(json.dumps(response))
sys.stdout.flush()
except Exception as e:
error_response = {
"jsonrpc": "2.0",
"id": None,
"error": {"code": -1, "message": str(e)},
}
print(json.dumps(error_response))
sys.stdout.flush()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,69 @@
# LEANN Claude Code Integration
Intelligent code assistance using LEANN's vector search directly in Claude Code.
## Prerequisites
First, install LEANN CLI globally:
```bash
uv tool install leann
```
This makes the `leann` command available system-wide, which `leann_mcp` requires.
## Quick Setup
Add the LEANN MCP server to Claude Code:
```bash
claude mcp add leann-server -- leann_mcp
```
## Available Tools
- **`leann_list`** - List available indexes across all projects
- **`leann_search`** - Search code and documents with semantic queries
- **`leann_ask`** - Ask questions and get AI-powered answers from your codebase
## Quick Start
```bash
# Build an index for your project
leann build my-project
# Start Claude Code
claude
```
Then in Claude Code:
```
Help me understand this codebase. List available indexes and search for authentication patterns.
```
<p align="center">
<img src="../../assets/claude_code_leann.png" alt="LEANN in Claude Code" width="80%">
</p>
## How It Works
- **`leann`** - Core CLI tool for indexing and searching (installed globally)
- **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration
- Claude Code calls `leann_mcp`, which executes `leann` commands and returns results
## File Support
Python, JavaScript, TypeScript, Java, Go, Rust, SQL, YAML, JSON, and 30+ more file types.
## Storage
- Project indexes in `.leann/` directory (like `.git`)
- Global project registry at `~/.leann/projects.json`
- Multi-project support built-in
## Removing
```bash
claude mcp remove leann-server
```

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "leann" name = "leann"
version = "0.2.1" version = "0.2.2"
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!" description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"

10
uv.lock generated
View File

@@ -2155,7 +2155,7 @@ wheels = [
[[package]] [[package]]
name = "leann-backend-diskann" name = "leann-backend-diskann"
version = "0.2.0" version = "0.2.1"
source = { editable = "packages/leann-backend-diskann" } source = { editable = "packages/leann-backend-diskann" }
dependencies = [ dependencies = [
{ name = "leann-core" }, { name = "leann-core" },
@@ -2167,14 +2167,14 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "leann-core", specifier = "==0.2.0" }, { name = "leann-core", specifier = "==0.2.1" },
{ name = "numpy" }, { name = "numpy" },
{ name = "protobuf", specifier = ">=3.19.0" }, { name = "protobuf", specifier = ">=3.19.0" },
] ]
[[package]] [[package]]
name = "leann-backend-hnsw" name = "leann-backend-hnsw"
version = "0.2.0" version = "0.2.1"
source = { editable = "packages/leann-backend-hnsw" } source = { editable = "packages/leann-backend-hnsw" }
dependencies = [ dependencies = [
{ name = "leann-core" }, { name = "leann-core" },
@@ -2187,7 +2187,7 @@ dependencies = [
[package.metadata] [package.metadata]
requires-dist = [ requires-dist = [
{ name = "leann-core", specifier = "==0.2.0" }, { name = "leann-core", specifier = "==0.2.1" },
{ name = "msgpack", specifier = ">=1.0.0" }, { name = "msgpack", specifier = ">=1.0.0" },
{ name = "numpy" }, { name = "numpy" },
{ name = "pyzmq", specifier = ">=23.0.0" }, { name = "pyzmq", specifier = ">=23.0.0" },
@@ -2195,7 +2195,7 @@ requires-dist = [
[[package]] [[package]]
name = "leann-core" name = "leann-core"
version = "0.2.0" version = "0.2.1"
source = { editable = "packages/leann-core" } source = { editable = "packages/leann-core" }
dependencies = [ dependencies = [
{ name = "accelerate" }, { name = "accelerate" },