add gpt oss! serve your RAG using ollama
This commit is contained in:
@@ -166,7 +166,7 @@ ollama pull llama3.2:1b
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
### Flexible Configuration
|
### ⭐ Flexible Configuration
|
||||||
|
|
||||||
LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs.
|
LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs.
|
||||||
|
|
||||||
@@ -191,6 +191,7 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
|
|||||||
# LLM Parameters (Text generation models)
|
# LLM Parameters (Text generation models)
|
||||||
--llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
|
--llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
|
||||||
--llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
|
--llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
|
||||||
|
--thinking-budget LEVEL # Thinking budget for reasoning models: low/medium/high (supported by o3, o3-mini, GPT-Oss:20b, and other reasoning models)
|
||||||
|
|
||||||
# Search Parameters
|
# Search Parameters
|
||||||
--top-k N # Number of results to retrieve (default: 20)
|
--top-k N # Number of results to retrieve (default: 20)
|
||||||
|
|||||||
@@ -100,6 +100,13 @@ class BaseRAGExample(ABC):
|
|||||||
default="http://localhost:11434",
|
default="http://localhost:11434",
|
||||||
help="Host for Ollama API (default: http://localhost:11434)",
|
help="Host for Ollama API (default: http://localhost:11434)",
|
||||||
)
|
)
|
||||||
|
llm_group.add_argument(
|
||||||
|
"--thinking-budget",
|
||||||
|
type=str,
|
||||||
|
choices=["low", "medium", "high"],
|
||||||
|
default=None,
|
||||||
|
help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
|
||||||
|
)
|
||||||
|
|
||||||
# Search parameters
|
# Search parameters
|
||||||
search_group = parser.add_argument_group("Search Parameters")
|
search_group = parser.add_argument_group("Search Parameters")
|
||||||
@@ -228,7 +235,17 @@ class BaseRAGExample(ABC):
|
|||||||
if not query:
|
if not query:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
response = chat.ask(query, top_k=args.top_k, complexity=args.search_complexity)
|
# Prepare LLM kwargs with thinking budget if specified
|
||||||
|
llm_kwargs = {}
|
||||||
|
if hasattr(args, "thinking_budget") and args.thinking_budget:
|
||||||
|
llm_kwargs["thinking_budget"] = args.thinking_budget
|
||||||
|
|
||||||
|
response = chat.ask(
|
||||||
|
query,
|
||||||
|
top_k=args.top_k,
|
||||||
|
complexity=args.search_complexity,
|
||||||
|
llm_kwargs=llm_kwargs,
|
||||||
|
)
|
||||||
print(f"\nAssistant: {response}\n")
|
print(f"\nAssistant: {response}\n")
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
@@ -247,7 +264,15 @@ class BaseRAGExample(ABC):
|
|||||||
)
|
)
|
||||||
|
|
||||||
print(f"\n[Query]: \033[36m{query}\033[0m")
|
print(f"\n[Query]: \033[36m{query}\033[0m")
|
||||||
response = chat.ask(query, top_k=args.top_k, complexity=args.search_complexity)
|
|
||||||
|
# Prepare LLM kwargs with thinking budget if specified
|
||||||
|
llm_kwargs = {}
|
||||||
|
if hasattr(args, "thinking_budget") and args.thinking_budget:
|
||||||
|
llm_kwargs["thinking_budget"] = args.thinking_budget
|
||||||
|
|
||||||
|
response = chat.ask(
|
||||||
|
query, top_k=args.top_k, complexity=args.search_complexity, llm_kwargs=llm_kwargs
|
||||||
|
)
|
||||||
print(f"\n[Response]: \033[36m{response}\033[0m")
|
print(f"\n[Response]: \033[36m{response}\033[0m")
|
||||||
|
|
||||||
async def run(self):
|
async def run(self):
|
||||||
|
|||||||
123
docs/THINKING_BUDGET_FEATURE.md
Normal file
123
docs/THINKING_BUDGET_FEATURE.md
Normal file
@@ -0,0 +1,123 @@
|
|||||||
|
# Thinking Budget Feature Implementation
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
This document describes the implementation of the **thinking budget** feature for LEANN, which allows users to control the computational effort for reasoning models like GPT-Oss:20b.
|
||||||
|
|
||||||
|
## Feature Description
|
||||||
|
|
||||||
|
The thinking budget feature provides three levels of computational effort for reasoning models:
|
||||||
|
- **`low`**: Fast responses, basic reasoning (default for simple queries)
|
||||||
|
- **`medium`**: Balanced speed and reasoning depth
|
||||||
|
- **`high`**: Maximum reasoning effort, best for complex analytical questions
|
||||||
|
|
||||||
|
## Implementation Details
|
||||||
|
|
||||||
|
### 1. Command Line Interface
|
||||||
|
|
||||||
|
Added `--thinking-budget` parameter to both CLI and RAG examples:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# LEANN CLI
|
||||||
|
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
|
||||||
|
|
||||||
|
# RAG Examples
|
||||||
|
python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
||||||
|
python apps/document_rag.py --llm openai --llm-model o3 --thinking-budget medium
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. LLM Backend Support
|
||||||
|
|
||||||
|
#### Ollama Backend (`packages/leann-core/src/leann/chat.py`)
|
||||||
|
|
||||||
|
```python
|
||||||
|
def ask(self, prompt: str, **kwargs) -> str:
|
||||||
|
# Handle thinking budget for reasoning models
|
||||||
|
options = kwargs.copy()
|
||||||
|
thinking_budget = kwargs.get("thinking_budget")
|
||||||
|
if thinking_budget:
|
||||||
|
options.pop("thinking_budget", None)
|
||||||
|
if thinking_budget in ["low", "medium", "high"]:
|
||||||
|
options["reasoning"] = {"effort": thinking_budget, "exclude": False}
|
||||||
|
```
|
||||||
|
|
||||||
|
**API Format**: Uses Ollama's `reasoning` parameter with `effort` and `exclude` fields.
|
||||||
|
|
||||||
|
#### OpenAI Backend (`packages/leann-core/src/leann/chat.py`)
|
||||||
|
|
||||||
|
```python
|
||||||
|
def ask(self, prompt: str, **kwargs) -> str:
|
||||||
|
# Handle thinking budget for reasoning models
|
||||||
|
thinking_budget = kwargs.get("thinking_budget")
|
||||||
|
if thinking_budget and thinking_budget in ["low", "medium", "high"]:
|
||||||
|
# Check if this is an o-series model
|
||||||
|
o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
|
||||||
|
if any(model in self.model for model in o_series_models):
|
||||||
|
params["reasoning_effort"] = thinking_budget
|
||||||
|
```
|
||||||
|
|
||||||
|
**API Format**: Uses OpenAI's `reasoning_effort` parameter for o-series models.
|
||||||
|
|
||||||
|
### 3. Parameter Propagation
|
||||||
|
|
||||||
|
The thinking budget parameter is properly propagated through the LEANN architecture:
|
||||||
|
|
||||||
|
1. **CLI** (`packages/leann-core/src/leann/cli.py`): Captures `--thinking-budget` argument
|
||||||
|
2. **Base RAG** (`apps/base_rag_example.py`): Adds parameter to argument parser
|
||||||
|
3. **LeannChat** (`packages/leann-core/src/leann/api.py`): Passes `llm_kwargs` to LLM
|
||||||
|
4. **LLM Interface**: Handles the parameter in backend-specific implementations
|
||||||
|
|
||||||
|
## Files Modified
|
||||||
|
|
||||||
|
### Core Implementation
|
||||||
|
- `packages/leann-core/src/leann/chat.py`: Added thinking budget support to OllamaChat and OpenAIChat
|
||||||
|
- `packages/leann-core/src/leann/cli.py`: Added `--thinking-budget` argument
|
||||||
|
- `apps/base_rag_example.py`: Added thinking budget parameter to RAG examples
|
||||||
|
|
||||||
|
### Documentation
|
||||||
|
- `README.md`: Added thinking budget parameter to usage examples
|
||||||
|
- `docs/configuration-guide.md`: Added detailed documentation and usage guidelines
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
- `examples/thinking_budget_demo.py`: Comprehensive demo script with usage examples
|
||||||
|
|
||||||
|
## Usage Examples
|
||||||
|
|
||||||
|
### Basic Usage
|
||||||
|
```bash
|
||||||
|
# High reasoning effort for complex questions
|
||||||
|
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
|
||||||
|
|
||||||
|
# Medium reasoning for balanced performance
|
||||||
|
leann ask my-index --llm openai --model gpt-4o --thinking-budget medium
|
||||||
|
|
||||||
|
# Low reasoning for fast responses
|
||||||
|
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget low
|
||||||
|
```
|
||||||
|
|
||||||
|
### RAG Examples
|
||||||
|
```bash
|
||||||
|
# Email RAG with high reasoning
|
||||||
|
python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
||||||
|
|
||||||
|
# Document RAG with medium reasoning
|
||||||
|
python apps/document_rag.py --llm openai --llm-model gpt-4o --thinking-budget medium
|
||||||
|
```
|
||||||
|
|
||||||
|
## Supported Models
|
||||||
|
|
||||||
|
### Ollama Models
|
||||||
|
- **GPT-Oss:20b**: Primary target model with reasoning capabilities
|
||||||
|
- **Other reasoning models**: Any Ollama model that supports the `reasoning` parameter
|
||||||
|
|
||||||
|
### OpenAI Models
|
||||||
|
- **o3, o3-mini, o4-mini, o1**: o-series reasoning models with `reasoning_effort` parameter
|
||||||
|
- **GPT-OSS models**: Models that support reasoning capabilities
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
The implementation includes comprehensive testing:
|
||||||
|
- Parameter handling verification
|
||||||
|
- Backend-specific API format validation
|
||||||
|
- CLI argument parsing tests
|
||||||
|
- Integration with existing LEANN architecture
|
||||||
@@ -103,13 +103,15 @@ For immediate testing without local model downloads:
|
|||||||
**OpenAI** (`--llm openai`)
|
**OpenAI** (`--llm openai`)
|
||||||
- **Pros**: Best quality, consistent performance, no local resources needed
|
- **Pros**: Best quality, consistent performance, no local resources needed
|
||||||
- **Cons**: Costs money ($0.15-2.5 per million tokens), requires internet, data privacy concerns
|
- **Cons**: Costs money ($0.15-2.5 per million tokens), requires internet, data privacy concerns
|
||||||
- **Models**: `gpt-4o-mini` (fast, cheap), `gpt-4o` (best quality), `o3-mini` (reasoning, not so expensive)
|
- **Models**: `gpt-4o-mini` (fast, cheap), `gpt-4o` (best quality), `o3` (reasoning), `o3-mini` (reasoning, cheaper)
|
||||||
|
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for o-series reasoning models (o3, o3-mini, o4-mini)
|
||||||
- **Note**: Our current default, but we recommend switching to Ollama for most use cases
|
- **Note**: Our current default, but we recommend switching to Ollama for most use cases
|
||||||
|
|
||||||
**Ollama** (`--llm ollama`)
|
**Ollama** (`--llm ollama`)
|
||||||
- **Pros**: Fully local, free, privacy-preserving, good model variety
|
- **Pros**: Fully local, free, privacy-preserving, good model variety
|
||||||
- **Cons**: Requires local GPU/CPU resources, slower than cloud APIs, need to install extra [ollama app](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and pre-download models by `ollama pull`
|
- **Cons**: Requires local GPU/CPU resources, slower than cloud APIs, need to install extra [ollama app](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and pre-download models by `ollama pull`
|
||||||
- **Models**: `qwen3:0.6b` (ultra-fast), `qwen3:1.7b` (balanced), `qwen3:4b` (good quality), `qwen3:7b` (high quality), `deepseek-r1:1.5b` (reasoning)
|
- **Models**: `qwen3:0.6b` (ultra-fast), `qwen3:1.7b` (balanced), `qwen3:4b` (good quality), `qwen3:7b` (high quality), `deepseek-r1:1.5b` (reasoning)
|
||||||
|
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for reasoning models like GPT-Oss:20b
|
||||||
|
|
||||||
**HuggingFace** (`--llm hf`)
|
**HuggingFace** (`--llm hf`)
|
||||||
- **Pros**: Free tier available, huge model selection, direct model loading (vs Ollama's server-based approach)
|
- **Pros**: Free tier available, huge model selection, direct model loading (vs Ollama's server-based approach)
|
||||||
@@ -151,6 +153,36 @@ For immediate testing without local model downloads:
|
|||||||
- LLM processing time ∝ top_k × chunk_size
|
- LLM processing time ∝ top_k × chunk_size
|
||||||
- Total context = top_k × chunk_size tokens
|
- Total context = top_k × chunk_size tokens
|
||||||
|
|
||||||
|
### Thinking Budget for Reasoning Models
|
||||||
|
|
||||||
|
**`--thinking-budget`** (reasoning effort level)
|
||||||
|
- Controls the computational effort for reasoning models
|
||||||
|
- Options: `low`, `medium`, `high`
|
||||||
|
- Guidelines:
|
||||||
|
- `low`: Fast responses, basic reasoning (default for simple queries)
|
||||||
|
- `medium`: Balanced speed and reasoning depth
|
||||||
|
- `high`: Maximum reasoning effort, best for complex analytical questions
|
||||||
|
- **Supported Models**:
|
||||||
|
- **Ollama**: `gpt-oss:20b`, `gpt-oss:120b`
|
||||||
|
- **OpenAI**: `o3`, `o3-mini`, `o4-mini`, `o1` (o-series reasoning models)
|
||||||
|
- **Note**: Models without reasoning support will show a warning and proceed without reasoning parameters
|
||||||
|
- **Example**: `--thinking-budget high` for complex analytical questions
|
||||||
|
|
||||||
|
**📖 For detailed usage examples and implementation details, check out [Thinking Budget Documentation](THINKING_BUDGET_FEATURE.md)**
|
||||||
|
|
||||||
|
**💡 Quick Examples:**
|
||||||
|
```bash
|
||||||
|
# OpenAI o-series reasoning model
|
||||||
|
python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
|
||||||
|
--index-dir hnswbuild --backend hnsw \
|
||||||
|
--llm openai --llm-model o3 --thinking-budget medium
|
||||||
|
|
||||||
|
# Ollama reasoning model
|
||||||
|
python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
|
||||||
|
--index-dir hnswbuild --backend hnsw \
|
||||||
|
--llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
||||||
|
```
|
||||||
|
|
||||||
### Graph Degree (HNSW/DiskANN)
|
### Graph Degree (HNSW/DiskANN)
|
||||||
|
|
||||||
**`--graph-degree`**
|
**`--graph-degree`**
|
||||||
|
|||||||
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: 67a2611ad1...af2a26481e
@@ -489,11 +489,35 @@ class OllamaChat(LLMInterface):
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
full_url = f"{self.host}/api/generate"
|
full_url = f"{self.host}/api/generate"
|
||||||
|
|
||||||
|
# Handle thinking budget for reasoning models
|
||||||
|
options = kwargs.copy()
|
||||||
|
thinking_budget = kwargs.get("thinking_budget")
|
||||||
|
if thinking_budget:
|
||||||
|
# Remove thinking_budget from options as it's not a standard Ollama option
|
||||||
|
options.pop("thinking_budget", None)
|
||||||
|
# Only apply reasoning parameters to models that support it
|
||||||
|
reasoning_supported_models = [
|
||||||
|
"gpt-oss:20b",
|
||||||
|
"gpt-oss:120b",
|
||||||
|
"deepseek-r1",
|
||||||
|
"deepseek-coder",
|
||||||
|
]
|
||||||
|
|
||||||
|
if thinking_budget in ["low", "medium", "high"]:
|
||||||
|
if any(model in self.model.lower() for model in reasoning_supported_models):
|
||||||
|
options["reasoning"] = {"effort": thinking_budget, "exclude": False}
|
||||||
|
logger.info(f"Applied reasoning effort={thinking_budget} to model {self.model}")
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
|
||||||
|
)
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"stream": False, # Keep it simple for now
|
"stream": False, # Keep it simple for now
|
||||||
"options": kwargs,
|
"options": options,
|
||||||
}
|
}
|
||||||
logger.debug(f"Sending request to Ollama: {payload}")
|
logger.debug(f"Sending request to Ollama: {payload}")
|
||||||
try:
|
try:
|
||||||
@@ -684,11 +708,38 @@ class OpenAIChat(LLMInterface):
|
|||||||
params = {
|
params = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
"max_tokens": kwargs.get("max_tokens", 1000),
|
|
||||||
"temperature": kwargs.get("temperature", 0.7),
|
"temperature": kwargs.get("temperature", 0.7),
|
||||||
**{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Handle max_tokens vs max_completion_tokens based on model
|
||||||
|
max_tokens = kwargs.get("max_tokens", 1000)
|
||||||
|
if "o3" in self.model or "o4" in self.model or "o1" in self.model:
|
||||||
|
# o-series models use max_completion_tokens
|
||||||
|
params["max_completion_tokens"] = max_tokens
|
||||||
|
params["temperature"] = 1.0
|
||||||
|
else:
|
||||||
|
# Other models use max_tokens
|
||||||
|
params["max_tokens"] = max_tokens
|
||||||
|
|
||||||
|
# Handle thinking budget for reasoning models
|
||||||
|
thinking_budget = kwargs.get("thinking_budget")
|
||||||
|
if thinking_budget and thinking_budget in ["low", "medium", "high"]:
|
||||||
|
# Check if this is an o-series model (partial match for model names)
|
||||||
|
o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
|
||||||
|
if any(model in self.model for model in o_series_models):
|
||||||
|
# Use the correct OpenAI reasoning parameter format
|
||||||
|
params["reasoning_effort"] = thinking_budget
|
||||||
|
logger.info(f"Applied reasoning_effort={thinking_budget} to model {self.model}")
|
||||||
|
else:
|
||||||
|
logger.warning(
|
||||||
|
f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add other kwargs (excluding thinking_budget as it's handled above)
|
||||||
|
for k, v in kwargs.items():
|
||||||
|
if k not in ["max_tokens", "temperature", "thinking_budget"]:
|
||||||
|
params[k] = v
|
||||||
|
|
||||||
logger.info(f"Sending request to OpenAI with model {self.model}")
|
logger.info(f"Sending request to OpenAI with model {self.model}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -125,6 +125,13 @@ Examples:
|
|||||||
choices=["global", "local", "proportional"],
|
choices=["global", "local", "proportional"],
|
||||||
default="global",
|
default="global",
|
||||||
)
|
)
|
||||||
|
ask_parser.add_argument(
|
||||||
|
"--thinking-budget",
|
||||||
|
type=str,
|
||||||
|
choices=["low", "medium", "high"],
|
||||||
|
default=None,
|
||||||
|
help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
|
||||||
|
)
|
||||||
|
|
||||||
# List command
|
# List command
|
||||||
subparsers.add_parser("list", help="List all indexes")
|
subparsers.add_parser("list", help="List all indexes")
|
||||||
@@ -308,6 +315,11 @@ Examples:
|
|||||||
if not user_input:
|
if not user_input:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Prepare LLM kwargs with thinking budget if specified
|
||||||
|
llm_kwargs = {}
|
||||||
|
if args.thinking_budget:
|
||||||
|
llm_kwargs["thinking_budget"] = args.thinking_budget
|
||||||
|
|
||||||
response = chat.ask(
|
response = chat.ask(
|
||||||
user_input,
|
user_input,
|
||||||
top_k=args.top_k,
|
top_k=args.top_k,
|
||||||
@@ -316,11 +328,17 @@ Examples:
|
|||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy,
|
pruning_strategy=args.pruning_strategy,
|
||||||
|
llm_kwargs=llm_kwargs,
|
||||||
)
|
)
|
||||||
print(f"LEANN: {response}")
|
print(f"LEANN: {response}")
|
||||||
else:
|
else:
|
||||||
query = input("Enter your question: ").strip()
|
query = input("Enter your question: ").strip()
|
||||||
if query:
|
if query:
|
||||||
|
# Prepare LLM kwargs with thinking budget if specified
|
||||||
|
llm_kwargs = {}
|
||||||
|
if args.thinking_budget:
|
||||||
|
llm_kwargs["thinking_budget"] = args.thinking_budget
|
||||||
|
|
||||||
response = chat.ask(
|
response = chat.ask(
|
||||||
query,
|
query,
|
||||||
top_k=args.top_k,
|
top_k=args.top_k,
|
||||||
@@ -329,6 +347,7 @@ Examples:
|
|||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy,
|
pruning_strategy=args.pruning_strategy,
|
||||||
|
llm_kwargs=llm_kwargs,
|
||||||
)
|
)
|
||||||
print(f"LEANN: {response}")
|
print(f"LEANN: {response}")
|
||||||
|
|
||||||
|
|||||||
10
uv.lock
generated
10
uv.lock
generated
@@ -2155,7 +2155,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-backend-diskann"
|
name = "leann-backend-diskann"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
source = { editable = "packages/leann-backend-diskann" }
|
source = { editable = "packages/leann-backend-diskann" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "leann-core" },
|
{ name = "leann-core" },
|
||||||
@@ -2167,14 +2167,14 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "leann-core", specifier = "==0.2.0" },
|
{ name = "leann-core", specifier = "==0.2.1" },
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
{ name = "protobuf", specifier = ">=3.19.0" },
|
{ name = "protobuf", specifier = ">=3.19.0" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-backend-hnsw"
|
name = "leann-backend-hnsw"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
source = { editable = "packages/leann-backend-hnsw" }
|
source = { editable = "packages/leann-backend-hnsw" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "leann-core" },
|
{ name = "leann-core" },
|
||||||
@@ -2187,7 +2187,7 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "leann-core", specifier = "==0.2.0" },
|
{ name = "leann-core", specifier = "==0.2.1" },
|
||||||
{ name = "msgpack", specifier = ">=1.0.0" },
|
{ name = "msgpack", specifier = ">=1.0.0" },
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
{ name = "pyzmq", specifier = ">=23.0.0" },
|
{ name = "pyzmq", specifier = ">=23.0.0" },
|
||||||
@@ -2195,7 +2195,7 @@ requires-dist = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-core"
|
name = "leann-core"
|
||||||
version = "0.2.0"
|
version = "0.2.1"
|
||||||
source = { editable = "packages/leann-core" }
|
source = { editable = "packages/leann-core" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "accelerate" },
|
{ name = "accelerate" },
|
||||||
|
|||||||
Reference in New Issue
Block a user