Compare commits
4 Commits
feature/cl
...
docs/updat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dec55dc5d1 | ||
|
|
5cc724e661 | ||
|
|
57a2ddec3e | ||
|
|
c80720c6f2 |
14
README.md
14
README.md
@@ -18,8 +18,6 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg
|
|||||||
|
|
||||||
**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
|
**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
|
||||||
|
|
||||||
> **🚀 NEW: Claude Code Integration!** LEANN now provides native MCP integration for Claude Code users. Index your codebase and get intelligent code assistance directly in Claude Code. [Setup Guide →](packages/leann-mcp/README.md)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
## Why LEANN?
|
## Why LEANN?
|
||||||
@@ -168,7 +166,7 @@ ollama pull llama3.2:1b
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
### ⭐ Flexible Configuration
|
### Flexible Configuration
|
||||||
|
|
||||||
LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs.
|
LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs.
|
||||||
|
|
||||||
@@ -193,7 +191,6 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
|
|||||||
# LLM Parameters (Text generation models)
|
# LLM Parameters (Text generation models)
|
||||||
--llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
|
--llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
|
||||||
--llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
|
--llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
|
||||||
--thinking-budget LEVEL # Thinking budget for reasoning models: low/medium/high (supported by o3, o3-mini, GPT-Oss:20b, and other reasoning models)
|
|
||||||
|
|
||||||
# Search Parameters
|
# Search Parameters
|
||||||
--top-k N # Number of results to retrieve (default: 20)
|
--top-k N # Number of results to retrieve (default: 20)
|
||||||
@@ -430,7 +427,7 @@ source .venv/bin/activate
|
|||||||
leann --help
|
leann --help
|
||||||
```
|
```
|
||||||
|
|
||||||
**To make it globally available:**
|
**To make it globally available (recommended for daily use):**
|
||||||
```bash
|
```bash
|
||||||
# Install the LEANN CLI globally using uv tool
|
# Install the LEANN CLI globally using uv tool
|
||||||
uv tool install leann
|
uv tool install leann
|
||||||
@@ -439,17 +436,12 @@ uv tool install leann
|
|||||||
leann --help
|
leann --help
|
||||||
```
|
```
|
||||||
|
|
||||||
> **Note**: Global installation is required for Claude Code integration. The `leann_mcp` server depends on the globally available `leann` command.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Usage Examples
|
### Usage Examples
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Build an index from current directory (default)
|
# Build an index from documents
|
||||||
leann build my-docs
|
|
||||||
|
|
||||||
# Or from specific directory
|
|
||||||
leann build my-docs --docs ./documents
|
leann build my-docs --docs ./documents
|
||||||
|
|
||||||
# Search your documents
|
# Search your documents
|
||||||
|
|||||||
@@ -100,13 +100,6 @@ class BaseRAGExample(ABC):
|
|||||||
default="http://localhost:11434",
|
default="http://localhost:11434",
|
||||||
help="Host for Ollama API (default: http://localhost:11434)",
|
help="Host for Ollama API (default: http://localhost:11434)",
|
||||||
)
|
)
|
||||||
llm_group.add_argument(
|
|
||||||
"--thinking-budget",
|
|
||||||
type=str,
|
|
||||||
choices=["low", "medium", "high"],
|
|
||||||
default=None,
|
|
||||||
help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Search parameters
|
# Search parameters
|
||||||
search_group = parser.add_argument_group("Search Parameters")
|
search_group = parser.add_argument_group("Search Parameters")
|
||||||
@@ -235,17 +228,7 @@ class BaseRAGExample(ABC):
|
|||||||
if not query:
|
if not query:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Prepare LLM kwargs with thinking budget if specified
|
response = chat.ask(query, top_k=args.top_k, complexity=args.search_complexity)
|
||||||
llm_kwargs = {}
|
|
||||||
if hasattr(args, "thinking_budget") and args.thinking_budget:
|
|
||||||
llm_kwargs["thinking_budget"] = args.thinking_budget
|
|
||||||
|
|
||||||
response = chat.ask(
|
|
||||||
query,
|
|
||||||
top_k=args.top_k,
|
|
||||||
complexity=args.search_complexity,
|
|
||||||
llm_kwargs=llm_kwargs,
|
|
||||||
)
|
|
||||||
print(f"\nAssistant: {response}\n")
|
print(f"\nAssistant: {response}\n")
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
@@ -264,15 +247,7 @@ class BaseRAGExample(ABC):
|
|||||||
)
|
)
|
||||||
|
|
||||||
print(f"\n[Query]: \033[36m{query}\033[0m")
|
print(f"\n[Query]: \033[36m{query}\033[0m")
|
||||||
|
response = chat.ask(query, top_k=args.top_k, complexity=args.search_complexity)
|
||||||
# Prepare LLM kwargs with thinking budget if specified
|
|
||||||
llm_kwargs = {}
|
|
||||||
if hasattr(args, "thinking_budget") and args.thinking_budget:
|
|
||||||
llm_kwargs["thinking_budget"] = args.thinking_budget
|
|
||||||
|
|
||||||
response = chat.ask(
|
|
||||||
query, top_k=args.top_k, complexity=args.search_complexity, llm_kwargs=llm_kwargs
|
|
||||||
)
|
|
||||||
print(f"\n[Response]: \033[36m{response}\033[0m")
|
print(f"\n[Response]: \033[36m{response}\033[0m")
|
||||||
|
|
||||||
async def run(self):
|
async def run(self):
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 73 KiB |
@@ -1,123 +0,0 @@
|
|||||||
# Thinking Budget Feature Implementation
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
This document describes the implementation of the **thinking budget** feature for LEANN, which allows users to control the computational effort for reasoning models like GPT-Oss:20b.
|
|
||||||
|
|
||||||
## Feature Description
|
|
||||||
|
|
||||||
The thinking budget feature provides three levels of computational effort for reasoning models:
|
|
||||||
- **`low`**: Fast responses, basic reasoning (default for simple queries)
|
|
||||||
- **`medium`**: Balanced speed and reasoning depth
|
|
||||||
- **`high`**: Maximum reasoning effort, best for complex analytical questions
|
|
||||||
|
|
||||||
## Implementation Details
|
|
||||||
|
|
||||||
### 1. Command Line Interface
|
|
||||||
|
|
||||||
Added `--thinking-budget` parameter to both CLI and RAG examples:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# LEANN CLI
|
|
||||||
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
|
|
||||||
|
|
||||||
# RAG Examples
|
|
||||||
python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
|
||||||
python apps/document_rag.py --llm openai --llm-model o3 --thinking-budget medium
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. LLM Backend Support
|
|
||||||
|
|
||||||
#### Ollama Backend (`packages/leann-core/src/leann/chat.py`)
|
|
||||||
|
|
||||||
```python
|
|
||||||
def ask(self, prompt: str, **kwargs) -> str:
|
|
||||||
# Handle thinking budget for reasoning models
|
|
||||||
options = kwargs.copy()
|
|
||||||
thinking_budget = kwargs.get("thinking_budget")
|
|
||||||
if thinking_budget:
|
|
||||||
options.pop("thinking_budget", None)
|
|
||||||
if thinking_budget in ["low", "medium", "high"]:
|
|
||||||
options["reasoning"] = {"effort": thinking_budget, "exclude": False}
|
|
||||||
```
|
|
||||||
|
|
||||||
**API Format**: Uses Ollama's `reasoning` parameter with `effort` and `exclude` fields.
|
|
||||||
|
|
||||||
#### OpenAI Backend (`packages/leann-core/src/leann/chat.py`)
|
|
||||||
|
|
||||||
```python
|
|
||||||
def ask(self, prompt: str, **kwargs) -> str:
|
|
||||||
# Handle thinking budget for reasoning models
|
|
||||||
thinking_budget = kwargs.get("thinking_budget")
|
|
||||||
if thinking_budget and thinking_budget in ["low", "medium", "high"]:
|
|
||||||
# Check if this is an o-series model
|
|
||||||
o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
|
|
||||||
if any(model in self.model for model in o_series_models):
|
|
||||||
params["reasoning_effort"] = thinking_budget
|
|
||||||
```
|
|
||||||
|
|
||||||
**API Format**: Uses OpenAI's `reasoning_effort` parameter for o-series models.
|
|
||||||
|
|
||||||
### 3. Parameter Propagation
|
|
||||||
|
|
||||||
The thinking budget parameter is properly propagated through the LEANN architecture:
|
|
||||||
|
|
||||||
1. **CLI** (`packages/leann-core/src/leann/cli.py`): Captures `--thinking-budget` argument
|
|
||||||
2. **Base RAG** (`apps/base_rag_example.py`): Adds parameter to argument parser
|
|
||||||
3. **LeannChat** (`packages/leann-core/src/leann/api.py`): Passes `llm_kwargs` to LLM
|
|
||||||
4. **LLM Interface**: Handles the parameter in backend-specific implementations
|
|
||||||
|
|
||||||
## Files Modified
|
|
||||||
|
|
||||||
### Core Implementation
|
|
||||||
- `packages/leann-core/src/leann/chat.py`: Added thinking budget support to OllamaChat and OpenAIChat
|
|
||||||
- `packages/leann-core/src/leann/cli.py`: Added `--thinking-budget` argument
|
|
||||||
- `apps/base_rag_example.py`: Added thinking budget parameter to RAG examples
|
|
||||||
|
|
||||||
### Documentation
|
|
||||||
- `README.md`: Added thinking budget parameter to usage examples
|
|
||||||
- `docs/configuration-guide.md`: Added detailed documentation and usage guidelines
|
|
||||||
|
|
||||||
### Examples
|
|
||||||
- `examples/thinking_budget_demo.py`: Comprehensive demo script with usage examples
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Basic Usage
|
|
||||||
```bash
|
|
||||||
# High reasoning effort for complex questions
|
|
||||||
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
|
|
||||||
|
|
||||||
# Medium reasoning for balanced performance
|
|
||||||
leann ask my-index --llm openai --model gpt-4o --thinking-budget medium
|
|
||||||
|
|
||||||
# Low reasoning for fast responses
|
|
||||||
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget low
|
|
||||||
```
|
|
||||||
|
|
||||||
### RAG Examples
|
|
||||||
```bash
|
|
||||||
# Email RAG with high reasoning
|
|
||||||
python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
|
||||||
|
|
||||||
# Document RAG with medium reasoning
|
|
||||||
python apps/document_rag.py --llm openai --llm-model gpt-4o --thinking-budget medium
|
|
||||||
```
|
|
||||||
|
|
||||||
## Supported Models
|
|
||||||
|
|
||||||
### Ollama Models
|
|
||||||
- **GPT-Oss:20b**: Primary target model with reasoning capabilities
|
|
||||||
- **Other reasoning models**: Any Ollama model that supports the `reasoning` parameter
|
|
||||||
|
|
||||||
### OpenAI Models
|
|
||||||
- **o3, o3-mini, o4-mini, o1**: o-series reasoning models with `reasoning_effort` parameter
|
|
||||||
- **GPT-OSS models**: Models that support reasoning capabilities
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
The implementation includes comprehensive testing:
|
|
||||||
- Parameter handling verification
|
|
||||||
- Backend-specific API format validation
|
|
||||||
- CLI argument parsing tests
|
|
||||||
- Integration with existing LEANN architecture
|
|
||||||
@@ -1,150 +0,0 @@
|
|||||||
# Claude Code x LEANN 集成指南
|
|
||||||
|
|
||||||
## ✅ 现状:已经可以工作!
|
|
||||||
|
|
||||||
好消息:LEANN CLI已经完全可以在Claude Code中使用,无需任何修改!
|
|
||||||
|
|
||||||
## 🚀 立即开始
|
|
||||||
|
|
||||||
### 1. 激活环境
|
|
||||||
```bash
|
|
||||||
# 在LEANN项目目录下
|
|
||||||
source .venv/bin/activate.fish # fish shell
|
|
||||||
# 或
|
|
||||||
source .venv/bin/activate # bash shell
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. 基本命令
|
|
||||||
|
|
||||||
#### 查看现有索引
|
|
||||||
```bash
|
|
||||||
leann list
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 搜索文档
|
|
||||||
```bash
|
|
||||||
leann search my-docs "machine learning" --recompute-embeddings
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 问答对话
|
|
||||||
```bash
|
|
||||||
echo "What is machine learning?" | leann ask my-docs --llm ollama --model qwen3:8b --recompute-embeddings
|
|
||||||
```
|
|
||||||
|
|
||||||
#### 构建新索引
|
|
||||||
```bash
|
|
||||||
leann build project-docs --docs ./src --recompute-embeddings
|
|
||||||
```
|
|
||||||
|
|
||||||
## 💡 Claude Code 使用技巧
|
|
||||||
|
|
||||||
### 在Claude Code中直接使用
|
|
||||||
|
|
||||||
1. **激活环境**:
|
|
||||||
```bash
|
|
||||||
cd /Users/andyl/Projects/LEANN-RAG
|
|
||||||
source .venv/bin/activate.fish
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **搜索代码库**:
|
|
||||||
```bash
|
|
||||||
leann search my-docs "authentication patterns" --recompute-embeddings --top-k 10
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **智能问答**:
|
|
||||||
```bash
|
|
||||||
echo "How does the authentication system work?" | leann ask my-docs --llm ollama --model qwen3:8b --recompute-embeddings
|
|
||||||
```
|
|
||||||
|
|
||||||
### 批量操作示例
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# 构建项目文档索引
|
|
||||||
leann build project-docs --docs ./docs --force
|
|
||||||
|
|
||||||
# 搜索多个关键词
|
|
||||||
leann search project-docs "API authentication" --recompute-embeddings
|
|
||||||
leann search project-docs "database schema" --recompute-embeddings
|
|
||||||
leann search project-docs "deployment guide" --recompute-embeddings
|
|
||||||
|
|
||||||
# 问答模式
|
|
||||||
echo "What are the API endpoints?" | leann ask project-docs --recompute-embeddings
|
|
||||||
```
|
|
||||||
|
|
||||||
## 🎯 Claude 可以立即执行的工作流
|
|
||||||
|
|
||||||
### 代码分析工作流
|
|
||||||
```bash
|
|
||||||
# 1. 构建代码库索引
|
|
||||||
leann build codebase --docs ./src --backend hnsw --recompute-embeddings
|
|
||||||
|
|
||||||
# 2. 分析架构
|
|
||||||
echo "What is the overall architecture?" | leann ask codebase --recompute-embeddings
|
|
||||||
|
|
||||||
# 3. 查找特定功能
|
|
||||||
leann search codebase "user authentication" --recompute-embeddings --top-k 5
|
|
||||||
|
|
||||||
# 4. 理解实现细节
|
|
||||||
echo "How is user authentication implemented?" | leann ask codebase --recompute-embeddings
|
|
||||||
```
|
|
||||||
|
|
||||||
### 文档理解工作流
|
|
||||||
```bash
|
|
||||||
# 1. 索引项目文档
|
|
||||||
leann build docs --docs ./docs --recompute-embeddings
|
|
||||||
|
|
||||||
# 2. 快速查找信息
|
|
||||||
leann search docs "installation requirements" --recompute-embeddings
|
|
||||||
|
|
||||||
# 3. 获取详细说明
|
|
||||||
echo "What are the system requirements?" | leann ask docs --recompute-embeddings
|
|
||||||
```
|
|
||||||
|
|
||||||
## ⚠️ 重要提示
|
|
||||||
|
|
||||||
1. **必须使用 `--recompute-embeddings`** - 这是关键参数,不加会报错
|
|
||||||
2. **需要先激活虚拟环境** - 确保有LEANN的Python环境
|
|
||||||
3. **Ollama需要预先安装** - ask功能需要本地LLM
|
|
||||||
|
|
||||||
## 🔥 立即可用的Claude提示词
|
|
||||||
|
|
||||||
```
|
|
||||||
Help me analyze this codebase using LEANN:
|
|
||||||
|
|
||||||
1. First, activate the environment:
|
|
||||||
cd /Users/andyl/Projects/LEANN-RAG && source .venv/bin/activate.fish
|
|
||||||
|
|
||||||
2. Build an index of the source code:
|
|
||||||
leann build codebase --docs ./src --recompute-embeddings
|
|
||||||
|
|
||||||
3. Search for authentication patterns:
|
|
||||||
leann search codebase "authentication middleware" --recompute-embeddings --top-k 10
|
|
||||||
|
|
||||||
4. Ask about the authentication system:
|
|
||||||
echo "How does user authentication work in this codebase?" | leann ask codebase --recompute-embeddings
|
|
||||||
|
|
||||||
Please execute these commands and help me understand the code structure.
|
|
||||||
```
|
|
||||||
|
|
||||||
## 📈 下一步改进计划
|
|
||||||
|
|
||||||
虽然现在已经可以用,但还可以进一步优化:
|
|
||||||
|
|
||||||
1. **简化命令** - 默认启用recompute-embeddings
|
|
||||||
2. **配置文件** - 避免重复输入参数
|
|
||||||
3. **状态管理** - 自动检测环境和索引
|
|
||||||
4. **输出格式** - 更适合Claude解析的格式
|
|
||||||
|
|
||||||
但这些都是锦上添花,现在就能用起来!
|
|
||||||
|
|
||||||
## 🎉 总结
|
|
||||||
|
|
||||||
**LEANN现在就可以在Claude Code中完美工作!**
|
|
||||||
|
|
||||||
- ✅ 搜索功能正常
|
|
||||||
- ✅ RAG问答功能正常
|
|
||||||
- ✅ 索引构建功能正常
|
|
||||||
- ✅ 支持多种数据源
|
|
||||||
- ✅ 支持本地LLM
|
|
||||||
|
|
||||||
只需要记住加上 `--recompute-embeddings` 参数就行!
|
|
||||||
@@ -103,15 +103,13 @@ For immediate testing without local model downloads:
|
|||||||
**OpenAI** (`--llm openai`)
|
**OpenAI** (`--llm openai`)
|
||||||
- **Pros**: Best quality, consistent performance, no local resources needed
|
- **Pros**: Best quality, consistent performance, no local resources needed
|
||||||
- **Cons**: Costs money ($0.15-2.5 per million tokens), requires internet, data privacy concerns
|
- **Cons**: Costs money ($0.15-2.5 per million tokens), requires internet, data privacy concerns
|
||||||
- **Models**: `gpt-4o-mini` (fast, cheap), `gpt-4o` (best quality), `o3` (reasoning), `o3-mini` (reasoning, cheaper)
|
- **Models**: `gpt-4o-mini` (fast, cheap), `gpt-4o` (best quality), `o3-mini` (reasoning, not so expensive)
|
||||||
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for o-series reasoning models (o3, o3-mini, o4-mini)
|
|
||||||
- **Note**: Our current default, but we recommend switching to Ollama for most use cases
|
- **Note**: Our current default, but we recommend switching to Ollama for most use cases
|
||||||
|
|
||||||
**Ollama** (`--llm ollama`)
|
**Ollama** (`--llm ollama`)
|
||||||
- **Pros**: Fully local, free, privacy-preserving, good model variety
|
- **Pros**: Fully local, free, privacy-preserving, good model variety
|
||||||
- **Cons**: Requires local GPU/CPU resources, slower than cloud APIs, need to install extra [ollama app](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and pre-download models by `ollama pull`
|
- **Cons**: Requires local GPU/CPU resources, slower than cloud APIs, need to install extra [ollama app](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and pre-download models by `ollama pull`
|
||||||
- **Models**: `qwen3:0.6b` (ultra-fast), `qwen3:1.7b` (balanced), `qwen3:4b` (good quality), `qwen3:7b` (high quality), `deepseek-r1:1.5b` (reasoning)
|
- **Models**: `qwen3:0.6b` (ultra-fast), `qwen3:1.7b` (balanced), `qwen3:4b` (good quality), `qwen3:7b` (high quality), `deepseek-r1:1.5b` (reasoning)
|
||||||
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for reasoning models like GPT-Oss:20b
|
|
||||||
|
|
||||||
**HuggingFace** (`--llm hf`)
|
**HuggingFace** (`--llm hf`)
|
||||||
- **Pros**: Free tier available, huge model selection, direct model loading (vs Ollama's server-based approach)
|
- **Pros**: Free tier available, huge model selection, direct model loading (vs Ollama's server-based approach)
|
||||||
@@ -153,36 +151,6 @@ For immediate testing without local model downloads:
|
|||||||
- LLM processing time ∝ top_k × chunk_size
|
- LLM processing time ∝ top_k × chunk_size
|
||||||
- Total context = top_k × chunk_size tokens
|
- Total context = top_k × chunk_size tokens
|
||||||
|
|
||||||
### Thinking Budget for Reasoning Models
|
|
||||||
|
|
||||||
**`--thinking-budget`** (reasoning effort level)
|
|
||||||
- Controls the computational effort for reasoning models
|
|
||||||
- Options: `low`, `medium`, `high`
|
|
||||||
- Guidelines:
|
|
||||||
- `low`: Fast responses, basic reasoning (default for simple queries)
|
|
||||||
- `medium`: Balanced speed and reasoning depth
|
|
||||||
- `high`: Maximum reasoning effort, best for complex analytical questions
|
|
||||||
- **Supported Models**:
|
|
||||||
- **Ollama**: `gpt-oss:20b`, `gpt-oss:120b`
|
|
||||||
- **OpenAI**: `o3`, `o3-mini`, `o4-mini`, `o1` (o-series reasoning models)
|
|
||||||
- **Note**: Models without reasoning support will show a warning and proceed without reasoning parameters
|
|
||||||
- **Example**: `--thinking-budget high` for complex analytical questions
|
|
||||||
|
|
||||||
**📖 For detailed usage examples and implementation details, check out [Thinking Budget Documentation](THINKING_BUDGET_FEATURE.md)**
|
|
||||||
|
|
||||||
**💡 Quick Examples:**
|
|
||||||
```bash
|
|
||||||
# OpenAI o-series reasoning model
|
|
||||||
python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
|
|
||||||
--index-dir hnswbuild --backend hnsw \
|
|
||||||
--llm openai --llm-model o3 --thinking-budget medium
|
|
||||||
|
|
||||||
# Ollama reasoning model
|
|
||||||
python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
|
|
||||||
--index-dir hnswbuild --backend hnsw \
|
|
||||||
--llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
|
||||||
```
|
|
||||||
|
|
||||||
### Graph Degree (HNSW/DiskANN)
|
### Graph Degree (HNSW/DiskANN)
|
||||||
|
|
||||||
**`--graph-degree`**
|
**`--graph-degree`**
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ colab = [
|
|||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
leann = "leann.cli:main"
|
leann = "leann.cli:main"
|
||||||
leann_mcp = "leann.mcp:main"
|
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
where = ["src"]
|
where = ["src"]
|
||||||
|
|||||||
@@ -489,35 +489,11 @@ class OllamaChat(LLMInterface):
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
full_url = f"{self.host}/api/generate"
|
full_url = f"{self.host}/api/generate"
|
||||||
|
|
||||||
# Handle thinking budget for reasoning models
|
|
||||||
options = kwargs.copy()
|
|
||||||
thinking_budget = kwargs.get("thinking_budget")
|
|
||||||
if thinking_budget:
|
|
||||||
# Remove thinking_budget from options as it's not a standard Ollama option
|
|
||||||
options.pop("thinking_budget", None)
|
|
||||||
# Only apply reasoning parameters to models that support it
|
|
||||||
reasoning_supported_models = [
|
|
||||||
"gpt-oss:20b",
|
|
||||||
"gpt-oss:120b",
|
|
||||||
"deepseek-r1",
|
|
||||||
"deepseek-coder",
|
|
||||||
]
|
|
||||||
|
|
||||||
if thinking_budget in ["low", "medium", "high"]:
|
|
||||||
if any(model in self.model.lower() for model in reasoning_supported_models):
|
|
||||||
options["reasoning"] = {"effort": thinking_budget, "exclude": False}
|
|
||||||
logger.info(f"Applied reasoning effort={thinking_budget} to model {self.model}")
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
|
|
||||||
)
|
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"stream": False, # Keep it simple for now
|
"stream": False, # Keep it simple for now
|
||||||
"options": options,
|
"options": kwargs,
|
||||||
}
|
}
|
||||||
logger.debug(f"Sending request to Ollama: {payload}")
|
logger.debug(f"Sending request to Ollama: {payload}")
|
||||||
try:
|
try:
|
||||||
@@ -708,38 +684,11 @@ class OpenAIChat(LLMInterface):
|
|||||||
params = {
|
params = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"max_tokens": kwargs.get("max_tokens", 1000),
|
||||||
"temperature": kwargs.get("temperature", 0.7),
|
"temperature": kwargs.get("temperature", 0.7),
|
||||||
|
**{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]},
|
||||||
}
|
}
|
||||||
|
|
||||||
# Handle max_tokens vs max_completion_tokens based on model
|
|
||||||
max_tokens = kwargs.get("max_tokens", 1000)
|
|
||||||
if "o3" in self.model or "o4" in self.model or "o1" in self.model:
|
|
||||||
# o-series models use max_completion_tokens
|
|
||||||
params["max_completion_tokens"] = max_tokens
|
|
||||||
params["temperature"] = 1.0
|
|
||||||
else:
|
|
||||||
# Other models use max_tokens
|
|
||||||
params["max_tokens"] = max_tokens
|
|
||||||
|
|
||||||
# Handle thinking budget for reasoning models
|
|
||||||
thinking_budget = kwargs.get("thinking_budget")
|
|
||||||
if thinking_budget and thinking_budget in ["low", "medium", "high"]:
|
|
||||||
# Check if this is an o-series model (partial match for model names)
|
|
||||||
o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
|
|
||||||
if any(model in self.model for model in o_series_models):
|
|
||||||
# Use the correct OpenAI reasoning parameter format
|
|
||||||
params["reasoning_effort"] = thinking_budget
|
|
||||||
logger.info(f"Applied reasoning_effort={thinking_budget} to model {self.model}")
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add other kwargs (excluding thinking_budget as it's handled above)
|
|
||||||
for k, v in kwargs.items():
|
|
||||||
if k not in ["max_tokens", "temperature", "thinking_budget"]:
|
|
||||||
params[k] = v
|
|
||||||
|
|
||||||
logger.info(f"Sending request to OpenAI with model {self.model}")
|
logger.info(f"Sending request to OpenAI with model {self.model}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -41,23 +41,13 @@ def extract_pdf_text_with_pdfplumber(file_path: str) -> str:
|
|||||||
|
|
||||||
class LeannCLI:
|
class LeannCLI:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Always use project-local .leann directory (like .git)
|
self.indexes_dir = Path.home() / ".leann" / "indexes"
|
||||||
self.indexes_dir = Path.cwd() / ".leann" / "indexes"
|
|
||||||
self.indexes_dir.mkdir(parents=True, exist_ok=True)
|
self.indexes_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Default parser for documents
|
|
||||||
self.node_parser = SentenceSplitter(
|
self.node_parser = SentenceSplitter(
|
||||||
chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
|
chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Code-optimized parser
|
|
||||||
self.code_parser = SentenceSplitter(
|
|
||||||
chunk_size=512, # Larger chunks for code context
|
|
||||||
chunk_overlap=50, # Less overlap to preserve function boundaries
|
|
||||||
separator="\n", # Split by lines for code
|
|
||||||
paragraph_separator="\n\n", # Preserve logical code blocks
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_index_path(self, index_name: str) -> str:
|
def get_index_path(self, index_name: str) -> str:
|
||||||
index_dir = self.indexes_dir / index_name
|
index_dir = self.indexes_dir / index_name
|
||||||
return str(index_dir / "documents.leann")
|
return str(index_dir / "documents.leann")
|
||||||
@@ -86,9 +76,7 @@ Examples:
|
|||||||
# Build command
|
# Build command
|
||||||
build_parser = subparsers.add_parser("build", help="Build document index")
|
build_parser = subparsers.add_parser("build", help="Build document index")
|
||||||
build_parser.add_argument("index_name", help="Index name")
|
build_parser.add_argument("index_name", help="Index name")
|
||||||
build_parser.add_argument(
|
build_parser.add_argument("--docs", type=str, required=True, help="Documents directory")
|
||||||
"--docs", type=str, default=".", help="Documents directory (default: current directory)"
|
|
||||||
)
|
|
||||||
build_parser.add_argument(
|
build_parser.add_argument(
|
||||||
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
|
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
|
||||||
)
|
)
|
||||||
@@ -137,122 +125,43 @@ Examples:
|
|||||||
choices=["global", "local", "proportional"],
|
choices=["global", "local", "proportional"],
|
||||||
default="global",
|
default="global",
|
||||||
)
|
)
|
||||||
ask_parser.add_argument(
|
|
||||||
"--thinking-budget",
|
|
||||||
type=str,
|
|
||||||
choices=["low", "medium", "high"],
|
|
||||||
default=None,
|
|
||||||
help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# List command
|
# List command
|
||||||
subparsers.add_parser("list", help="List all indexes")
|
subparsers.add_parser("list", help="List all indexes")
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def register_project_dir(self):
|
|
||||||
"""Register current project directory in global registry"""
|
|
||||||
global_registry = Path.home() / ".leann" / "projects.json"
|
|
||||||
global_registry.parent.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
current_dir = str(Path.cwd())
|
|
||||||
|
|
||||||
# Load existing registry
|
|
||||||
projects = []
|
|
||||||
if global_registry.exists():
|
|
||||||
try:
|
|
||||||
import json
|
|
||||||
|
|
||||||
with open(global_registry) as f:
|
|
||||||
projects = json.load(f)
|
|
||||||
except Exception:
|
|
||||||
projects = []
|
|
||||||
|
|
||||||
# Add current directory if not already present
|
|
||||||
if current_dir not in projects:
|
|
||||||
projects.append(current_dir)
|
|
||||||
|
|
||||||
# Save registry
|
|
||||||
import json
|
|
||||||
|
|
||||||
with open(global_registry, "w") as f:
|
|
||||||
json.dump(projects, f, indent=2)
|
|
||||||
|
|
||||||
def list_indexes(self):
|
def list_indexes(self):
|
||||||
print("Stored LEANN indexes:")
|
print("Stored LEANN indexes:")
|
||||||
|
|
||||||
# Get all project directories with .leann
|
if not self.indexes_dir.exists():
|
||||||
global_registry = Path.home() / ".leann" / "projects.json"
|
|
||||||
all_projects = []
|
|
||||||
|
|
||||||
if global_registry.exists():
|
|
||||||
try:
|
|
||||||
import json
|
|
||||||
|
|
||||||
with open(global_registry) as f:
|
|
||||||
all_projects = json.load(f)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Filter to only existing directories with .leann
|
|
||||||
valid_projects = []
|
|
||||||
for project_dir in all_projects:
|
|
||||||
project_path = Path(project_dir)
|
|
||||||
if project_path.exists() and (project_path / ".leann" / "indexes").exists():
|
|
||||||
valid_projects.append(project_path)
|
|
||||||
|
|
||||||
# Add current project if it has .leann but not in registry
|
|
||||||
current_path = Path.cwd()
|
|
||||||
if (current_path / ".leann" / "indexes").exists() and current_path not in valid_projects:
|
|
||||||
valid_projects.append(current_path)
|
|
||||||
|
|
||||||
if not valid_projects:
|
|
||||||
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
||||||
return
|
return
|
||||||
|
|
||||||
total_indexes = 0
|
index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]
|
||||||
current_dir = Path.cwd()
|
|
||||||
|
|
||||||
for project_path in valid_projects:
|
if not index_dirs:
|
||||||
indexes_dir = project_path / ".leann" / "indexes"
|
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
||||||
if not indexes_dir.exists():
|
return
|
||||||
continue
|
|
||||||
|
|
||||||
index_dirs = [d for d in indexes_dir.iterdir() if d.is_dir()]
|
print(f"Found {len(index_dirs)} indexes:")
|
||||||
if not index_dirs:
|
for i, index_dir in enumerate(index_dirs, 1):
|
||||||
continue
|
index_name = index_dir.name
|
||||||
|
status = "✓" if self.index_exists(index_name) else "✗"
|
||||||
|
|
||||||
# Show project header
|
print(f" {i}. {index_name} [{status}]")
|
||||||
if project_path == current_dir:
|
if self.index_exists(index_name):
|
||||||
print(f"\n📁 Current project ({project_path}):")
|
index_dir / "documents.leann.meta.json"
|
||||||
else:
|
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
|
||||||
print(f"\n📂 {project_path}:")
|
1024 * 1024
|
||||||
|
)
|
||||||
|
print(f" Size: {size_mb:.1f} MB")
|
||||||
|
|
||||||
for index_dir in index_dirs:
|
if index_dirs:
|
||||||
total_indexes += 1
|
example_name = index_dirs[0].name
|
||||||
index_name = index_dir.name
|
print("\nUsage:")
|
||||||
meta_file = index_dir / "documents.leann.meta.json"
|
print(f' leann search {example_name} "your query"')
|
||||||
status = "✓" if meta_file.exists() else "✗"
|
print(f" leann ask {example_name} --interactive")
|
||||||
|
|
||||||
print(f" {total_indexes}. {index_name} [{status}]")
|
|
||||||
if status == "✓":
|
|
||||||
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
|
|
||||||
1024 * 1024
|
|
||||||
)
|
|
||||||
print(f" Size: {size_mb:.1f} MB")
|
|
||||||
|
|
||||||
if total_indexes > 0:
|
|
||||||
print(f"\nTotal: {total_indexes} indexes across {len(valid_projects)} projects")
|
|
||||||
print("\nUsage (current project only):")
|
|
||||||
|
|
||||||
# Show example from current project
|
|
||||||
current_indexes_dir = current_dir / ".leann" / "indexes"
|
|
||||||
if current_indexes_dir.exists():
|
|
||||||
current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()]
|
|
||||||
if current_index_dirs:
|
|
||||||
example_name = current_index_dirs[0].name
|
|
||||||
print(f' leann search {example_name} "your query"')
|
|
||||||
print(f" leann ask {example_name} --interactive")
|
|
||||||
|
|
||||||
def load_documents(self, docs_dir: str):
|
def load_documents(self, docs_dir: str):
|
||||||
print(f"Loading documents from {docs_dir}...")
|
print(f"Loading documents from {docs_dir}...")
|
||||||
@@ -287,125 +196,17 @@ Examples:
|
|||||||
documents.extend(default_docs)
|
documents.extend(default_docs)
|
||||||
|
|
||||||
# Load other file types with default reader
|
# Load other file types with default reader
|
||||||
code_extensions = [
|
|
||||||
# Original document types
|
|
||||||
".txt",
|
|
||||||
".md",
|
|
||||||
".docx",
|
|
||||||
# Code files for Claude Code integration
|
|
||||||
".py",
|
|
||||||
".js",
|
|
||||||
".ts",
|
|
||||||
".jsx",
|
|
||||||
".tsx",
|
|
||||||
".java",
|
|
||||||
".cpp",
|
|
||||||
".c",
|
|
||||||
".h",
|
|
||||||
".hpp",
|
|
||||||
".cs",
|
|
||||||
".go",
|
|
||||||
".rs",
|
|
||||||
".rb",
|
|
||||||
".php",
|
|
||||||
".swift",
|
|
||||||
".kt",
|
|
||||||
".scala",
|
|
||||||
".r",
|
|
||||||
".sql",
|
|
||||||
".sh",
|
|
||||||
".bash",
|
|
||||||
".zsh",
|
|
||||||
".fish",
|
|
||||||
".ps1",
|
|
||||||
".bat",
|
|
||||||
# Config and markup files
|
|
||||||
".json",
|
|
||||||
".yaml",
|
|
||||||
".yml",
|
|
||||||
".xml",
|
|
||||||
".toml",
|
|
||||||
".ini",
|
|
||||||
".cfg",
|
|
||||||
".conf",
|
|
||||||
".html",
|
|
||||||
".css",
|
|
||||||
".scss",
|
|
||||||
".less",
|
|
||||||
".vue",
|
|
||||||
".svelte",
|
|
||||||
# Data science
|
|
||||||
".ipynb",
|
|
||||||
".R",
|
|
||||||
".py",
|
|
||||||
".jl",
|
|
||||||
]
|
|
||||||
other_docs = SimpleDirectoryReader(
|
other_docs = SimpleDirectoryReader(
|
||||||
docs_dir,
|
docs_dir,
|
||||||
recursive=True,
|
recursive=True,
|
||||||
encoding="utf-8",
|
encoding="utf-8",
|
||||||
required_exts=code_extensions,
|
required_exts=[".txt", ".md", ".docx"],
|
||||||
).load_data(show_progress=True)
|
).load_data(show_progress=True)
|
||||||
documents.extend(other_docs)
|
documents.extend(other_docs)
|
||||||
|
|
||||||
all_texts = []
|
all_texts = []
|
||||||
|
|
||||||
# Define code file extensions for intelligent chunking
|
|
||||||
code_file_exts = {
|
|
||||||
".py",
|
|
||||||
".js",
|
|
||||||
".ts",
|
|
||||||
".jsx",
|
|
||||||
".tsx",
|
|
||||||
".java",
|
|
||||||
".cpp",
|
|
||||||
".c",
|
|
||||||
".h",
|
|
||||||
".hpp",
|
|
||||||
".cs",
|
|
||||||
".go",
|
|
||||||
".rs",
|
|
||||||
".rb",
|
|
||||||
".php",
|
|
||||||
".swift",
|
|
||||||
".kt",
|
|
||||||
".scala",
|
|
||||||
".r",
|
|
||||||
".sql",
|
|
||||||
".sh",
|
|
||||||
".bash",
|
|
||||||
".zsh",
|
|
||||||
".fish",
|
|
||||||
".ps1",
|
|
||||||
".bat",
|
|
||||||
".json",
|
|
||||||
".yaml",
|
|
||||||
".yml",
|
|
||||||
".xml",
|
|
||||||
".toml",
|
|
||||||
".ini",
|
|
||||||
".cfg",
|
|
||||||
".conf",
|
|
||||||
".html",
|
|
||||||
".css",
|
|
||||||
".scss",
|
|
||||||
".less",
|
|
||||||
".vue",
|
|
||||||
".svelte",
|
|
||||||
".ipynb",
|
|
||||||
".R",
|
|
||||||
".jl",
|
|
||||||
}
|
|
||||||
|
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
# Check if this is a code file based on source path
|
nodes = self.node_parser.get_nodes_from_documents([doc])
|
||||||
source_path = doc.metadata.get("source", "")
|
|
||||||
is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
|
|
||||||
|
|
||||||
# Use appropriate parser based on file type
|
|
||||||
parser = self.code_parser if is_code_file else self.node_parser
|
|
||||||
nodes = parser.get_nodes_from_documents([doc])
|
|
||||||
|
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
all_texts.append(node.get_content())
|
all_texts.append(node.get_content())
|
||||||
|
|
||||||
@@ -418,8 +219,6 @@ Examples:
|
|||||||
index_dir = self.indexes_dir / index_name
|
index_dir = self.indexes_dir / index_name
|
||||||
index_path = self.get_index_path(index_name)
|
index_path = self.get_index_path(index_name)
|
||||||
|
|
||||||
print(f"📂 Indexing: {Path(docs_dir).resolve()}")
|
|
||||||
|
|
||||||
if index_dir.exists() and not args.force:
|
if index_dir.exists() and not args.force:
|
||||||
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
||||||
return
|
return
|
||||||
@@ -449,9 +248,6 @@ Examples:
|
|||||||
builder.build_index(index_path)
|
builder.build_index(index_path)
|
||||||
print(f"Index built at {index_path}")
|
print(f"Index built at {index_path}")
|
||||||
|
|
||||||
# Register this project directory in global registry
|
|
||||||
self.register_project_dir()
|
|
||||||
|
|
||||||
async def search_documents(self, args):
|
async def search_documents(self, args):
|
||||||
index_name = args.index_name
|
index_name = args.index_name
|
||||||
query = args.query
|
query = args.query
|
||||||
@@ -512,11 +308,6 @@ Examples:
|
|||||||
if not user_input:
|
if not user_input:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Prepare LLM kwargs with thinking budget if specified
|
|
||||||
llm_kwargs = {}
|
|
||||||
if args.thinking_budget:
|
|
||||||
llm_kwargs["thinking_budget"] = args.thinking_budget
|
|
||||||
|
|
||||||
response = chat.ask(
|
response = chat.ask(
|
||||||
user_input,
|
user_input,
|
||||||
top_k=args.top_k,
|
top_k=args.top_k,
|
||||||
@@ -525,17 +316,11 @@ Examples:
|
|||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy,
|
pruning_strategy=args.pruning_strategy,
|
||||||
llm_kwargs=llm_kwargs,
|
|
||||||
)
|
)
|
||||||
print(f"LEANN: {response}")
|
print(f"LEANN: {response}")
|
||||||
else:
|
else:
|
||||||
query = input("Enter your question: ").strip()
|
query = input("Enter your question: ").strip()
|
||||||
if query:
|
if query:
|
||||||
# Prepare LLM kwargs with thinking budget if specified
|
|
||||||
llm_kwargs = {}
|
|
||||||
if args.thinking_budget:
|
|
||||||
llm_kwargs["thinking_budget"] = args.thinking_budget
|
|
||||||
|
|
||||||
response = chat.ask(
|
response = chat.ask(
|
||||||
query,
|
query,
|
||||||
top_k=args.top_k,
|
top_k=args.top_k,
|
||||||
@@ -544,7 +329,6 @@ Examples:
|
|||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy,
|
pruning_strategy=args.pruning_strategy,
|
||||||
llm_kwargs=llm_kwargs,
|
|
||||||
)
|
)
|
||||||
print(f"LEANN: {response}")
|
print(f"LEANN: {response}")
|
||||||
|
|
||||||
|
|||||||
@@ -1,134 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import json
|
|
||||||
import os
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def handle_request(request):
|
|
||||||
if request.get("method") == "initialize":
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"result": {
|
|
||||||
"capabilities": {"tools": {}},
|
|
||||||
"protocolVersion": "2024-11-05",
|
|
||||||
"serverInfo": {"name": "leann-mcp", "version": "1.0.0"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
elif request.get("method") == "tools/list":
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"result": {
|
|
||||||
"tools": [
|
|
||||||
{
|
|
||||||
"name": "leann_search",
|
|
||||||
"description": "Search LEANN index",
|
|
||||||
"inputSchema": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"index_name": {"type": "string"},
|
|
||||||
"query": {"type": "string"},
|
|
||||||
"top_k": {"type": "integer", "default": 5},
|
|
||||||
},
|
|
||||||
"required": ["index_name", "query"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "leann_ask",
|
|
||||||
"description": "Ask question using LEANN RAG",
|
|
||||||
"inputSchema": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"index_name": {"type": "string"},
|
|
||||||
"question": {"type": "string"},
|
|
||||||
},
|
|
||||||
"required": ["index_name", "question"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "leann_list",
|
|
||||||
"description": "List all LEANN indexes",
|
|
||||||
"inputSchema": {"type": "object", "properties": {}},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
elif request.get("method") == "tools/call":
|
|
||||||
tool_name = request["params"]["name"]
|
|
||||||
args = request["params"].get("arguments", {})
|
|
||||||
|
|
||||||
# Set working directory and environment
|
|
||||||
env = os.environ.copy()
|
|
||||||
cwd = "/Users/andyl/Projects/LEANN-RAG"
|
|
||||||
|
|
||||||
try:
|
|
||||||
if tool_name == "leann_search":
|
|
||||||
cmd = [
|
|
||||||
"leann",
|
|
||||||
"search",
|
|
||||||
args["index_name"],
|
|
||||||
args["query"],
|
|
||||||
"--recompute-embeddings",
|
|
||||||
f"--top-k={args.get('top_k', 5)}",
|
|
||||||
]
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True, cwd=cwd, env=env)
|
|
||||||
|
|
||||||
elif tool_name == "leann_ask":
|
|
||||||
cmd = f'echo "{args["question"]}" | leann ask {args["index_name"]} --recompute-embeddings --llm ollama --model qwen3:8b'
|
|
||||||
result = subprocess.run(
|
|
||||||
cmd, shell=True, capture_output=True, text=True, cwd=cwd, env=env
|
|
||||||
)
|
|
||||||
|
|
||||||
elif tool_name == "leann_list":
|
|
||||||
result = subprocess.run(
|
|
||||||
["leann", "list"], capture_output=True, text=True, cwd=cwd, env=env
|
|
||||||
)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"result": {
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": result.stdout
|
|
||||||
if result.returncode == 0
|
|
||||||
else f"Error: {result.stderr}",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"error": {"code": -1, "message": str(e)},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
for line in sys.stdin:
|
|
||||||
try:
|
|
||||||
request = json.loads(line.strip())
|
|
||||||
response = handle_request(request)
|
|
||||||
if response:
|
|
||||||
print(json.dumps(response))
|
|
||||||
sys.stdout.flush()
|
|
||||||
except Exception as e:
|
|
||||||
error_response = {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": None,
|
|
||||||
"error": {"code": -1, "message": str(e)},
|
|
||||||
}
|
|
||||||
print(json.dumps(error_response))
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,69 +0,0 @@
|
|||||||
# LEANN Claude Code Integration
|
|
||||||
|
|
||||||
Intelligent code assistance using LEANN's vector search directly in Claude Code.
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
First, install LEANN CLI globally:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv tool install leann
|
|
||||||
```
|
|
||||||
|
|
||||||
This makes the `leann` command available system-wide, which `leann_mcp` requires.
|
|
||||||
|
|
||||||
## Quick Setup
|
|
||||||
|
|
||||||
Add the LEANN MCP server to Claude Code:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
claude mcp add leann-server -- leann_mcp
|
|
||||||
```
|
|
||||||
|
|
||||||
## Available Tools
|
|
||||||
|
|
||||||
- **`leann_list`** - List available indexes across all projects
|
|
||||||
- **`leann_search`** - Search code and documents with semantic queries
|
|
||||||
- **`leann_ask`** - Ask questions and get AI-powered answers from your codebase
|
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build an index for your project
|
|
||||||
leann build my-project
|
|
||||||
|
|
||||||
# Start Claude Code
|
|
||||||
claude
|
|
||||||
```
|
|
||||||
|
|
||||||
Then in Claude Code:
|
|
||||||
```
|
|
||||||
Help me understand this codebase. List available indexes and search for authentication patterns.
|
|
||||||
```
|
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
<img src="../../assets/claude_code_leann.png" alt="LEANN in Claude Code" width="80%">
|
|
||||||
</p>
|
|
||||||
|
|
||||||
|
|
||||||
## How It Works
|
|
||||||
|
|
||||||
- **`leann`** - Core CLI tool for indexing and searching (installed globally)
|
|
||||||
- **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration
|
|
||||||
- Claude Code calls `leann_mcp`, which executes `leann` commands and returns results
|
|
||||||
|
|
||||||
## File Support
|
|
||||||
|
|
||||||
Python, JavaScript, TypeScript, Java, Go, Rust, SQL, YAML, JSON, and 30+ more file types.
|
|
||||||
|
|
||||||
## Storage
|
|
||||||
|
|
||||||
- Project indexes in `.leann/` directory (like `.git`)
|
|
||||||
- Global project registry at `~/.leann/projects.json`
|
|
||||||
- Multi-project support built-in
|
|
||||||
|
|
||||||
## Removing
|
|
||||||
|
|
||||||
```bash
|
|
||||||
claude mcp remove leann-server
|
|
||||||
```
|
|
||||||
10
uv.lock
generated
10
uv.lock
generated
@@ -2155,7 +2155,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-backend-diskann"
|
name = "leann-backend-diskann"
|
||||||
version = "0.2.1"
|
version = "0.2.0"
|
||||||
source = { editable = "packages/leann-backend-diskann" }
|
source = { editable = "packages/leann-backend-diskann" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "leann-core" },
|
{ name = "leann-core" },
|
||||||
@@ -2167,14 +2167,14 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "leann-core", specifier = "==0.2.1" },
|
{ name = "leann-core", specifier = "==0.2.0" },
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
{ name = "protobuf", specifier = ">=3.19.0" },
|
{ name = "protobuf", specifier = ">=3.19.0" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-backend-hnsw"
|
name = "leann-backend-hnsw"
|
||||||
version = "0.2.1"
|
version = "0.2.0"
|
||||||
source = { editable = "packages/leann-backend-hnsw" }
|
source = { editable = "packages/leann-backend-hnsw" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "leann-core" },
|
{ name = "leann-core" },
|
||||||
@@ -2187,7 +2187,7 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "leann-core", specifier = "==0.2.1" },
|
{ name = "leann-core", specifier = "==0.2.0" },
|
||||||
{ name = "msgpack", specifier = ">=1.0.0" },
|
{ name = "msgpack", specifier = ">=1.0.0" },
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
{ name = "pyzmq", specifier = ">=23.0.0" },
|
{ name = "pyzmq", specifier = ">=23.0.0" },
|
||||||
@@ -2195,7 +2195,7 @@ requires-dist = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-core"
|
name = "leann-core"
|
||||||
version = "0.2.1"
|
version = "0.2.0"
|
||||||
source = { editable = "packages/leann-core" }
|
source = { editable = "packages/leann-core" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "accelerate" },
|
{ name = "accelerate" },
|
||||||
|
|||||||
Reference in New Issue
Block a user