Compare commits
38 Commits
feat/multi
...
refactor-a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0877960547 | ||
|
|
d68af63d05 | ||
|
|
b844aca968 | ||
|
|
85277ba67a | ||
|
|
e9562acdc2 | ||
|
|
7fd3db1ddb | ||
|
|
c1ccc51a75 | ||
|
|
b0239b6e4d | ||
|
|
58556ef44c | ||
|
|
87c930d705 | ||
|
|
86f919a6da | ||
|
|
f8d34663b4 | ||
|
|
568cf597f4 | ||
|
|
baf70dc411 | ||
|
|
7ad2ec39d6 | ||
|
|
31fd3c816a | ||
|
|
1f6c7f2f5a | ||
|
|
c1124eb349 | ||
|
|
274bbb19ea | ||
|
|
8c152c7a31 | ||
|
|
ce77eef13a | ||
|
|
9d77175ac8 | ||
|
|
7fbb6c98ef | ||
|
|
914a248c28 | ||
|
|
55fc5862f9 | ||
|
|
fd97b8dfa8 | ||
|
|
57959947a1 | ||
|
|
cc0c091ca5 | ||
|
|
ff389c7d8d | ||
|
|
6780a8eaba | ||
|
|
984056f126 | ||
|
|
bd4451bf50 | ||
|
|
34e313f64a | ||
|
|
ddc789b231 | ||
|
|
ff1b622bdd | ||
|
|
3cde4fc7b3 | ||
|
|
4e3bcda5fa | ||
|
|
46f6f76fc3 |
85
.github/workflows/build-reusable.yml
vendored
85
.github/workflows/build-reusable.yml
vendored
@@ -54,26 +54,16 @@ jobs:
|
|||||||
python: '3.12'
|
python: '3.12'
|
||||||
- os: ubuntu-22.04
|
- os: ubuntu-22.04
|
||||||
python: '3.13'
|
python: '3.13'
|
||||||
- os: macos-14
|
- os: macos-latest
|
||||||
python: '3.9'
|
python: '3.9'
|
||||||
- os: macos-14
|
- os: macos-latest
|
||||||
python: '3.10'
|
python: '3.10'
|
||||||
- os: macos-14
|
- os: macos-latest
|
||||||
python: '3.11'
|
python: '3.11'
|
||||||
- os: macos-14
|
- os: macos-latest
|
||||||
python: '3.12'
|
python: '3.12'
|
||||||
- os: macos-14
|
- os: macos-latest
|
||||||
python: '3.13'
|
python: '3.13'
|
||||||
- os: macos-13
|
|
||||||
python: '3.9'
|
|
||||||
- os: macos-13
|
|
||||||
python: '3.10'
|
|
||||||
- os: macos-13
|
|
||||||
python: '3.11'
|
|
||||||
- os: macos-13
|
|
||||||
python: '3.12'
|
|
||||||
# Note: macos-13 + Python 3.13 excluded due to PyTorch compatibility
|
|
||||||
# (PyTorch 2.5+ supports Python 3.13 but not Intel Mac x86_64)
|
|
||||||
runs-on: ${{ matrix.os }}
|
runs-on: ${{ matrix.os }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
@@ -119,59 +109,48 @@ jobs:
|
|||||||
uv pip install --system delocate
|
uv pip install --system delocate
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Set macOS environment variables
|
|
||||||
if: runner.os == 'macOS'
|
|
||||||
run: |
|
|
||||||
# Use brew --prefix to automatically detect Homebrew installation path
|
|
||||||
HOMEBREW_PREFIX=$(brew --prefix)
|
|
||||||
echo "HOMEBREW_PREFIX=${HOMEBREW_PREFIX}" >> $GITHUB_ENV
|
|
||||||
echo "OpenMP_ROOT=${HOMEBREW_PREFIX}/opt/libomp" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
# Set CMAKE_PREFIX_PATH to let CMake find all packages automatically
|
|
||||||
echo "CMAKE_PREFIX_PATH=${HOMEBREW_PREFIX}" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
# Set compiler flags for OpenMP (required for both backends)
|
|
||||||
echo "LDFLAGS=-L${HOMEBREW_PREFIX}/opt/libomp/lib" >> $GITHUB_ENV
|
|
||||||
echo "CPPFLAGS=-I${HOMEBREW_PREFIX}/opt/libomp/include" >> $GITHUB_ENV
|
|
||||||
|
|
||||||
- name: Build packages
|
- name: Build packages
|
||||||
run: |
|
run: |
|
||||||
# Build core (platform independent)
|
# Build core (platform independent)
|
||||||
cd packages/leann-core
|
if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
|
||||||
uv build
|
cd packages/leann-core
|
||||||
cd ../..
|
uv build
|
||||||
|
cd ../..
|
||||||
|
fi
|
||||||
|
|
||||||
# Build HNSW backend
|
# Build HNSW backend
|
||||||
cd packages/leann-backend-hnsw
|
cd packages/leann-backend-hnsw
|
||||||
if [[ "${{ matrix.os }}" == macos-* ]]; then
|
if [ "${{ matrix.os }}" == "macos-latest" ]; then
|
||||||
# Use system clang for better compatibility
|
# Use system clang instead of homebrew LLVM for better compatibility
|
||||||
export CC=clang
|
export CC=clang
|
||||||
export CXX=clang++
|
export CXX=clang++
|
||||||
export MACOSX_DEPLOYMENT_TARGET=11.0
|
export MACOSX_DEPLOYMENT_TARGET=11.0
|
||||||
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
|
uv build --wheel --python python
|
||||||
else
|
else
|
||||||
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
|
uv build --wheel --python python
|
||||||
fi
|
fi
|
||||||
cd ../..
|
cd ../..
|
||||||
|
|
||||||
# Build DiskANN backend
|
# Build DiskANN backend
|
||||||
cd packages/leann-backend-diskann
|
cd packages/leann-backend-diskann
|
||||||
if [[ "${{ matrix.os }}" == macos-* ]]; then
|
if [ "${{ matrix.os }}" == "macos-latest" ]; then
|
||||||
# Use system clang for better compatibility
|
# Use system clang instead of homebrew LLVM for better compatibility
|
||||||
export CC=clang
|
export CC=clang
|
||||||
export CXX=clang++
|
export CXX=clang++
|
||||||
# DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function
|
# DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function
|
||||||
export MACOSX_DEPLOYMENT_TARGET=13.3
|
export MACOSX_DEPLOYMENT_TARGET=13.3
|
||||||
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
|
uv build --wheel --python python
|
||||||
else
|
else
|
||||||
uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist
|
uv build --wheel --python python
|
||||||
fi
|
fi
|
||||||
cd ../..
|
cd ../..
|
||||||
|
|
||||||
# Build meta package (platform independent)
|
# Build meta package (platform independent)
|
||||||
cd packages/leann
|
if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
|
||||||
uv build
|
cd packages/leann
|
||||||
cd ../..
|
uv build
|
||||||
|
cd ../..
|
||||||
|
fi
|
||||||
|
|
||||||
- name: Repair wheels (Linux)
|
- name: Repair wheels (Linux)
|
||||||
if: runner.os == 'Linux'
|
if: runner.os == 'Linux'
|
||||||
@@ -220,18 +199,20 @@ jobs:
|
|||||||
echo "📦 Built packages:"
|
echo "📦 Built packages:"
|
||||||
find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort
|
find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort
|
||||||
|
|
||||||
|
|
||||||
- name: Install built packages for testing
|
- name: Install built packages for testing
|
||||||
run: |
|
run: |
|
||||||
# Create a virtual environment with the correct Python version
|
# Create a virtual environment
|
||||||
uv venv --python ${{ matrix.python }}
|
uv venv
|
||||||
source .venv/bin/activate || source .venv/Scripts/activate
|
source .venv/bin/activate || source .venv/Scripts/activate
|
||||||
|
|
||||||
# Install packages using --find-links to prioritize local builds
|
# Install the built wheels
|
||||||
uv pip install --find-links packages/leann-core/dist --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist packages/leann-core/dist/*.whl || uv pip install --find-links packages/leann-core/dist packages/leann-core/dist/*.tar.gz
|
# Use --find-links to let uv choose the correct wheel for the platform
|
||||||
uv pip install --find-links packages/leann-core/dist packages/leann-backend-hnsw/dist/*.whl
|
if [[ "${{ matrix.os }}" == ubuntu-* ]]; then
|
||||||
uv pip install --find-links packages/leann-core/dist packages/leann-backend-diskann/dist/*.whl
|
uv pip install leann-core --find-links packages/leann-core/dist
|
||||||
uv pip install packages/leann/dist/*.whl || uv pip install packages/leann/dist/*.tar.gz
|
uv pip install leann --find-links packages/leann/dist
|
||||||
|
fi
|
||||||
|
uv pip install leann-backend-hnsw --find-links packages/leann-backend-hnsw/dist
|
||||||
|
uv pip install leann-backend-diskann --find-links packages/leann-backend-diskann/dist
|
||||||
|
|
||||||
# Install test dependencies using extras
|
# Install test dependencies using extras
|
||||||
uv pip install -e ".[test]"
|
uv pip install -e ".[test]"
|
||||||
|
|||||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -38,7 +38,7 @@ data/*
|
|||||||
!data/2501.14312v1 (1).pdf
|
!data/2501.14312v1 (1).pdf
|
||||||
!data/2506.08276v1.pdf
|
!data/2506.08276v1.pdf
|
||||||
!data/PrideandPrejudice.txt
|
!data/PrideandPrejudice.txt
|
||||||
!data/huawei_pangu.md
|
!data/README.md
|
||||||
!data/ground_truth/
|
!data/ground_truth/
|
||||||
!data/indices/
|
!data/indices/
|
||||||
!data/queries/
|
!data/queries/
|
||||||
|
|||||||
64
README.md
64
README.md
@@ -3,11 +3,9 @@
|
|||||||
</p>
|
</p>
|
||||||
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<img src="https://img.shields.io/badge/Python-3.9%20%7C%203.10%20%7C%203.11%20%7C%203.12%20%7C%203.13-blue.svg" alt="Python Versions">
|
<img src="https://img.shields.io/badge/Python-3.9%2B-blue.svg" alt="Python 3.9+">
|
||||||
<img src="https://github.com/yichuan-w/LEANN/actions/workflows/build-and-publish.yml/badge.svg" alt="CI Status">
|
|
||||||
<img src="https://img.shields.io/badge/Platform-Ubuntu%20%7C%20macOS%20(ARM64%2FIntel)-lightgrey" alt="Platform">
|
|
||||||
<img src="https://img.shields.io/badge/License-MIT-green.svg" alt="MIT License">
|
<img src="https://img.shields.io/badge/License-MIT-green.svg" alt="MIT License">
|
||||||
<img src="https://img.shields.io/badge/MCP-Native%20Integration-blue" alt="MCP Integration">
|
<img src="https://img.shields.io/badge/Platform-Linux%20%7C%20macOS-lightgrey" alt="Platform">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
<h2 align="center" tabindex="-1" class="heading-element" dir="auto">
|
<h2 align="center" tabindex="-1" class="heading-element" dir="auto">
|
||||||
@@ -18,10 +16,7 @@ LEANN is an innovative vector database that democratizes personal AI. Transform
|
|||||||
|
|
||||||
LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276)
|
LEANN achieves this through *graph-based selective recomputation* with *high-degree preserving pruning*, computing embeddings on-demand instead of storing them all. [Illustration Fig →](#️-architecture--how-it-works) | [Paper →](https://arxiv.org/abs/2506.08276)
|
||||||
|
|
||||||
**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can semantic search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, **[codebase](#-claude-code-integration-transform-your-development-workflow)**\* , or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
|
**Ready to RAG Everything?** Transform your laptop into a personal AI assistant that can search your **[file system](#-personal-data-manager-process-any-documents-pdf-txt-md)**, **[emails](#-your-personal-email-secretary-rag-on-apple-mail)**, **[browser history](#-time-machine-for-the-web-rag-your-entire-browser-history)**, **[chat history](#-wechat-detective-unlock-your-golden-memories)**, or external knowledge bases (i.e., 60M documents) - all on your laptop, with zero cloud costs and complete privacy.
|
||||||
|
|
||||||
|
|
||||||
\* Claude Code only supports basic `grep`-style keyword search. **LEANN** is a drop-in **semantic search MCP service fully compatible with Claude Code**, unlocking intelligent retrieval without changing your workflow. 🔥 Check out [the easy setup →](packages/leann-mcp/README.md)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -31,7 +26,7 @@ LEANN achieves this through *graph-based selective recomputation* with *high-deg
|
|||||||
<img src="assets/effects.png" alt="LEANN vs Traditional Vector DB Storage Comparison" width="70%">
|
<img src="assets/effects.png" alt="LEANN vs Traditional Vector DB Storage Comparison" width="70%">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
> **The numbers speak for themselves:** Index 60 million text chunks in just 6GB instead of 201GB. From emails to browser history, everything fits on your laptop. [See detailed benchmarks for different applications below ↓](#storage-comparison)
|
> **The numbers speak for themselves:** Index 60 million Wikipedia chunks in just 6GB instead of 201GB. From emails to browser history, everything fits on your laptop. [See detailed benchmarks for different applications below ↓](#storage-comparison)
|
||||||
|
|
||||||
|
|
||||||
🔒 **Privacy:** Your data never leaves your laptop. No OpenAI, no cloud, no "terms of service".
|
🔒 **Privacy:** Your data never leaves your laptop. No OpenAI, no cloud, no "terms of service".
|
||||||
@@ -171,12 +166,10 @@ ollama pull llama3.2:1b
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
### ⭐ Flexible Configuration
|
### Flexible Configuration
|
||||||
|
|
||||||
LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs.
|
LEANN provides flexible parameters for embedding models, search strategies, and data processing to fit your specific needs.
|
||||||
|
|
||||||
📚 **Need configuration best practices?** Check our [Configuration Guide](docs/configuration-guide.md) for detailed optimization tips, model selection advice, and solutions to common issues like slow embeddings or poor search quality.
|
|
||||||
|
|
||||||
<details>
|
<details>
|
||||||
<summary><strong>📋 Click to expand: Common Parameters (Available in All Examples)</strong></summary>
|
<summary><strong>📋 Click to expand: Common Parameters (Available in All Examples)</strong></summary>
|
||||||
|
|
||||||
@@ -190,13 +183,12 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
|
|||||||
--force-rebuild # Force rebuild index even if it exists
|
--force-rebuild # Force rebuild index even if it exists
|
||||||
|
|
||||||
# Embedding Parameters
|
# Embedding Parameters
|
||||||
--embedding-model MODEL # e.g., facebook/contriever, text-embedding-3-small, nomic-embed-text,mlx-community/Qwen3-Embedding-0.6B-8bit or nomic-embed-text
|
--embedding-model MODEL # e.g., facebook/contriever, text-embedding-3-small or mlx-community/multilingual-e5-base-mlx
|
||||||
--embedding-mode MODE # sentence-transformers, openai, mlx, or ollama
|
--embedding-mode MODE # sentence-transformers, openai, or mlx
|
||||||
|
|
||||||
# LLM Parameters (Text generation models)
|
# LLM Parameters (Text generation models)
|
||||||
--llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
|
--llm TYPE # LLM backend: openai, ollama, or hf (default: openai)
|
||||||
--llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
|
--llm-model MODEL # Model name (default: gpt-4o) e.g., gpt-4o-mini, llama3.2:1b, Qwen/Qwen2.5-1.5B-Instruct
|
||||||
--thinking-budget LEVEL # Thinking budget for reasoning models: low/medium/high (supported by o3, o3-mini, GPT-Oss:20b, and other reasoning models)
|
|
||||||
|
|
||||||
# Search Parameters
|
# Search Parameters
|
||||||
--top-k N # Number of results to retrieve (default: 20)
|
--top-k N # Number of results to retrieve (default: 20)
|
||||||
@@ -224,7 +216,7 @@ Ask questions directly about your personal PDFs, documents, and any directory co
|
|||||||
<img src="videos/paper_clear.gif" alt="LEANN Document Search Demo" width="600">
|
<img src="videos/paper_clear.gif" alt="LEANN Document Search Demo" width="600">
|
||||||
</p>
|
</p>
|
||||||
|
|
||||||
The example below asks a question about summarizing our paper (uses default data in `data/`, which is a directory with diverse data sources: two papers, Pride and Prejudice, and a Technical report about LLM in Huawei in Chinese), and this is the **easiest example** to run here:
|
The example below asks a question about summarizing our paper (uses default data in `data/`, which is a directory with diverse data sources: two papers, Pride and Prejudice, and a README in Chinese) and this is the **easiest example** to run here:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
source .venv/bin/activate # Don't forget to activate the virtual environment
|
source .venv/bin/activate # Don't forget to activate the virtual environment
|
||||||
@@ -419,26 +411,7 @@ Once the index is built, you can ask questions like:
|
|||||||
|
|
||||||
</details>
|
</details>
|
||||||
|
|
||||||
### 🚀 Claude Code Integration: Transform Your Development Workflow!
|
|
||||||
|
|
||||||
**The future of code assistance is here.** Transform your development workflow with LEANN's native MCP integration for Claude Code. Index your entire codebase and get intelligent code assistance directly in your IDE.
|
|
||||||
|
|
||||||
**Key features:**
|
|
||||||
- 🔍 **Semantic code search** across your entire project
|
|
||||||
- 📚 **Context-aware assistance** for debugging and development
|
|
||||||
- 🚀 **Zero-config setup** with automatic language detection
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Install LEANN globally for MCP integration
|
|
||||||
uv tool install leann-core
|
|
||||||
|
|
||||||
# Setup is automatic - just start using Claude Code!
|
|
||||||
```
|
|
||||||
Try our fully agentic pipeline with auto query rewriting, semantic search planning, and more:
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
**Ready to supercharge your coding?** [Complete Setup Guide →](packages/leann-mcp/README.md)
|
|
||||||
|
|
||||||
## 🖥️ Command Line Interface
|
## 🖥️ Command Line Interface
|
||||||
|
|
||||||
@@ -452,7 +425,7 @@ source .venv/bin/activate
|
|||||||
leann --help
|
leann --help
|
||||||
```
|
```
|
||||||
|
|
||||||
**To make it globally available:**
|
**To make it globally available (recommended for daily use):**
|
||||||
```bash
|
```bash
|
||||||
# Install the LEANN CLI globally using uv tool
|
# Install the LEANN CLI globally using uv tool
|
||||||
uv tool install leann
|
uv tool install leann
|
||||||
@@ -461,15 +434,13 @@ uv tool install leann
|
|||||||
leann --help
|
leann --help
|
||||||
```
|
```
|
||||||
|
|
||||||
> **Note**: Global installation is required for Claude Code integration. The `leann_mcp` server depends on the globally available `leann` command.
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Usage Examples
|
### Usage Examples
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# build from a specific directory, and my_docs is the index name(Here you can also build from multiple dict or multiple files)
|
# Build an index from documents
|
||||||
leann build my-docs --docs ./your_documents
|
leann build my-docs --docs ./documents
|
||||||
|
|
||||||
# Search your documents
|
# Search your documents
|
||||||
leann search my-docs "machine learning concepts"
|
leann search my-docs "machine learning concepts"
|
||||||
@@ -543,7 +514,7 @@ Options:
|
|||||||
- **Dynamic batching:** Efficiently batch embedding computations for GPU utilization
|
- **Dynamic batching:** Efficiently batch embedding computations for GPU utilization
|
||||||
- **Two-level search:** Smart graph traversal that prioritizes promising nodes
|
- **Two-level search:** Smart graph traversal that prioritizes promising nodes
|
||||||
|
|
||||||
**Backends:** HNSW (default) for most use cases, with optional DiskANN support for billion-scale datasets.
|
**Backends:** DiskANN or HNSW - pick what works for your data size.
|
||||||
|
|
||||||
## Benchmarks
|
## Benchmarks
|
||||||
|
|
||||||
@@ -563,7 +534,8 @@ Options:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
uv pip install -e ".[dev]" # Install dev dependencies
|
uv pip install -e ".[dev]" # Install dev dependencies
|
||||||
python benchmarks/run_evaluation.py # Will auto-download evaluation data and run benchmarks
|
python benchmarks/run_evaluation.py data/indices/dpr/dpr_diskann # DPR dataset
|
||||||
|
python benchmarks/run_evaluation.py data/indices/rpj_wiki/rpj_wiki.index # Wikipedia
|
||||||
```
|
```
|
||||||
|
|
||||||
The evaluation script downloads data automatically on first run. The last three results were tested with partial personal data, and you can reproduce them with your own data!
|
The evaluation script downloads data automatically on first run. The last three results were tested with partial personal data, and you can reproduce them with your own data!
|
||||||
@@ -601,15 +573,11 @@ MIT License - see [LICENSE](LICENSE) for details.
|
|||||||
|
|
||||||
## 🙏 Acknowledgments
|
## 🙏 Acknowledgments
|
||||||
|
|
||||||
Core Contributors: [Yichuan Wang](https://yichuan-w.github.io/) & [Zhifei Li](https://github.com/andylizf).
|
This work is done at [**Berkeley Sky Computing Lab**](https://sky.cs.berkeley.edu/).
|
||||||
|
|
||||||
We welcome more contributors! Feel free to open issues or submit PRs.
|
|
||||||
|
|
||||||
This work is done at [**Berkeley Sky Computing Lab**](https://sky.cs.berkeley.edu/).
|
---
|
||||||
|
|
||||||
## Star History
|
|
||||||
|
|
||||||
[](https://www.star-history.com/#yichuan-w/LEANN&Date)
|
|
||||||
<p align="center">
|
<p align="center">
|
||||||
<strong>⭐ Star us on GitHub if Leann is useful for your research or applications!</strong>
|
<strong>⭐ Star us on GitHub if Leann is useful for your research or applications!</strong>
|
||||||
</p>
|
</p>
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ class BaseRAGExample(ABC):
|
|||||||
"--embedding-mode",
|
"--embedding-mode",
|
||||||
type=str,
|
type=str,
|
||||||
default="sentence-transformers",
|
default="sentence-transformers",
|
||||||
choices=["sentence-transformers", "openai", "mlx", "ollama"],
|
choices=["sentence-transformers", "openai", "mlx"],
|
||||||
help="Embedding backend mode (default: sentence-transformers)",
|
help="Embedding backend mode (default: sentence-transformers)",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -85,7 +85,7 @@ class BaseRAGExample(ABC):
|
|||||||
"--llm",
|
"--llm",
|
||||||
type=str,
|
type=str,
|
||||||
default="openai",
|
default="openai",
|
||||||
choices=["openai", "ollama", "hf", "simulated"],
|
choices=["openai", "ollama", "hf"],
|
||||||
help="LLM backend to use (default: openai)",
|
help="LLM backend to use (default: openai)",
|
||||||
)
|
)
|
||||||
llm_group.add_argument(
|
llm_group.add_argument(
|
||||||
@@ -100,13 +100,6 @@ class BaseRAGExample(ABC):
|
|||||||
default="http://localhost:11434",
|
default="http://localhost:11434",
|
||||||
help="Host for Ollama API (default: http://localhost:11434)",
|
help="Host for Ollama API (default: http://localhost:11434)",
|
||||||
)
|
)
|
||||||
llm_group.add_argument(
|
|
||||||
"--thinking-budget",
|
|
||||||
type=str,
|
|
||||||
choices=["low", "medium", "high"],
|
|
||||||
default=None,
|
|
||||||
help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Search parameters
|
# Search parameters
|
||||||
search_group = parser.add_argument_group("Search Parameters")
|
search_group = parser.add_argument_group("Search Parameters")
|
||||||
@@ -235,17 +228,7 @@ class BaseRAGExample(ABC):
|
|||||||
if not query:
|
if not query:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Prepare LLM kwargs with thinking budget if specified
|
response = chat.ask(query, top_k=args.top_k, complexity=args.search_complexity)
|
||||||
llm_kwargs = {}
|
|
||||||
if hasattr(args, "thinking_budget") and args.thinking_budget:
|
|
||||||
llm_kwargs["thinking_budget"] = args.thinking_budget
|
|
||||||
|
|
||||||
response = chat.ask(
|
|
||||||
query,
|
|
||||||
top_k=args.top_k,
|
|
||||||
complexity=args.search_complexity,
|
|
||||||
llm_kwargs=llm_kwargs,
|
|
||||||
)
|
|
||||||
print(f"\nAssistant: {response}\n")
|
print(f"\nAssistant: {response}\n")
|
||||||
|
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
@@ -264,15 +247,7 @@ class BaseRAGExample(ABC):
|
|||||||
)
|
)
|
||||||
|
|
||||||
print(f"\n[Query]: \033[36m{query}\033[0m")
|
print(f"\n[Query]: \033[36m{query}\033[0m")
|
||||||
|
response = chat.ask(query, top_k=args.top_k, complexity=args.search_complexity)
|
||||||
# Prepare LLM kwargs with thinking budget if specified
|
|
||||||
llm_kwargs = {}
|
|
||||||
if hasattr(args, "thinking_budget") and args.thinking_budget:
|
|
||||||
llm_kwargs["thinking_budget"] = args.thinking_budget
|
|
||||||
|
|
||||||
response = chat.ask(
|
|
||||||
query, top_k=args.top_k, complexity=args.search_complexity, llm_kwargs=llm_kwargs
|
|
||||||
)
|
|
||||||
print(f"\n[Response]: \033[36m{response}\033[0m")
|
print(f"\n[Response]: \033[36m{response}\033[0m")
|
||||||
|
|
||||||
async def run(self):
|
async def run(self):
|
||||||
|
|||||||
@@ -99,9 +99,7 @@ if __name__ == "__main__":
|
|||||||
print("- 'What are the main techniques LEANN uses?'")
|
print("- 'What are the main techniques LEANN uses?'")
|
||||||
print("- 'What is the technique DLPM?'")
|
print("- 'What is the technique DLPM?'")
|
||||||
print("- 'Who does Elizabeth Bennet marry?'")
|
print("- 'Who does Elizabeth Bennet marry?'")
|
||||||
print(
|
print("- 'What is the problem of developing pan gu model? (盘古大模型开发中遇到什么问题?)'")
|
||||||
"- 'What is the problem of developing pan gu model Huawei meets? (盘古大模型开发中遇到什么问题?)'"
|
|
||||||
)
|
|
||||||
print("\nOr run without --query for interactive mode\n")
|
print("\nOr run without --query for interactive mode\n")
|
||||||
|
|
||||||
rag = DocumentRAG()
|
rag = DocumentRAG()
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 73 KiB |
Binary file not shown.
|
Before Width: | Height: | Size: 224 KiB |
@@ -1,123 +0,0 @@
|
|||||||
# Thinking Budget Feature Implementation
|
|
||||||
|
|
||||||
## Overview
|
|
||||||
|
|
||||||
This document describes the implementation of the **thinking budget** feature for LEANN, which allows users to control the computational effort for reasoning models like GPT-Oss:20b.
|
|
||||||
|
|
||||||
## Feature Description
|
|
||||||
|
|
||||||
The thinking budget feature provides three levels of computational effort for reasoning models:
|
|
||||||
- **`low`**: Fast responses, basic reasoning (default for simple queries)
|
|
||||||
- **`medium`**: Balanced speed and reasoning depth
|
|
||||||
- **`high`**: Maximum reasoning effort, best for complex analytical questions
|
|
||||||
|
|
||||||
## Implementation Details
|
|
||||||
|
|
||||||
### 1. Command Line Interface
|
|
||||||
|
|
||||||
Added `--thinking-budget` parameter to both CLI and RAG examples:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# LEANN CLI
|
|
||||||
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
|
|
||||||
|
|
||||||
# RAG Examples
|
|
||||||
python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
|
||||||
python apps/document_rag.py --llm openai --llm-model o3 --thinking-budget medium
|
|
||||||
```
|
|
||||||
|
|
||||||
### 2. LLM Backend Support
|
|
||||||
|
|
||||||
#### Ollama Backend (`packages/leann-core/src/leann/chat.py`)
|
|
||||||
|
|
||||||
```python
|
|
||||||
def ask(self, prompt: str, **kwargs) -> str:
|
|
||||||
# Handle thinking budget for reasoning models
|
|
||||||
options = kwargs.copy()
|
|
||||||
thinking_budget = kwargs.get("thinking_budget")
|
|
||||||
if thinking_budget:
|
|
||||||
options.pop("thinking_budget", None)
|
|
||||||
if thinking_budget in ["low", "medium", "high"]:
|
|
||||||
options["reasoning"] = {"effort": thinking_budget, "exclude": False}
|
|
||||||
```
|
|
||||||
|
|
||||||
**API Format**: Uses Ollama's `reasoning` parameter with `effort` and `exclude` fields.
|
|
||||||
|
|
||||||
#### OpenAI Backend (`packages/leann-core/src/leann/chat.py`)
|
|
||||||
|
|
||||||
```python
|
|
||||||
def ask(self, prompt: str, **kwargs) -> str:
|
|
||||||
# Handle thinking budget for reasoning models
|
|
||||||
thinking_budget = kwargs.get("thinking_budget")
|
|
||||||
if thinking_budget and thinking_budget in ["low", "medium", "high"]:
|
|
||||||
# Check if this is an o-series model
|
|
||||||
o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
|
|
||||||
if any(model in self.model for model in o_series_models):
|
|
||||||
params["reasoning_effort"] = thinking_budget
|
|
||||||
```
|
|
||||||
|
|
||||||
**API Format**: Uses OpenAI's `reasoning_effort` parameter for o-series models.
|
|
||||||
|
|
||||||
### 3. Parameter Propagation
|
|
||||||
|
|
||||||
The thinking budget parameter is properly propagated through the LEANN architecture:
|
|
||||||
|
|
||||||
1. **CLI** (`packages/leann-core/src/leann/cli.py`): Captures `--thinking-budget` argument
|
|
||||||
2. **Base RAG** (`apps/base_rag_example.py`): Adds parameter to argument parser
|
|
||||||
3. **LeannChat** (`packages/leann-core/src/leann/api.py`): Passes `llm_kwargs` to LLM
|
|
||||||
4. **LLM Interface**: Handles the parameter in backend-specific implementations
|
|
||||||
|
|
||||||
## Files Modified
|
|
||||||
|
|
||||||
### Core Implementation
|
|
||||||
- `packages/leann-core/src/leann/chat.py`: Added thinking budget support to OllamaChat and OpenAIChat
|
|
||||||
- `packages/leann-core/src/leann/cli.py`: Added `--thinking-budget` argument
|
|
||||||
- `apps/base_rag_example.py`: Added thinking budget parameter to RAG examples
|
|
||||||
|
|
||||||
### Documentation
|
|
||||||
- `README.md`: Added thinking budget parameter to usage examples
|
|
||||||
- `docs/configuration-guide.md`: Added detailed documentation and usage guidelines
|
|
||||||
|
|
||||||
### Examples
|
|
||||||
- `examples/thinking_budget_demo.py`: Comprehensive demo script with usage examples
|
|
||||||
|
|
||||||
## Usage Examples
|
|
||||||
|
|
||||||
### Basic Usage
|
|
||||||
```bash
|
|
||||||
# High reasoning effort for complex questions
|
|
||||||
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget high
|
|
||||||
|
|
||||||
# Medium reasoning for balanced performance
|
|
||||||
leann ask my-index --llm openai --model gpt-4o --thinking-budget medium
|
|
||||||
|
|
||||||
# Low reasoning for fast responses
|
|
||||||
leann ask my-index --llm ollama --model gpt-oss:20b --thinking-budget low
|
|
||||||
```
|
|
||||||
|
|
||||||
### RAG Examples
|
|
||||||
```bash
|
|
||||||
# Email RAG with high reasoning
|
|
||||||
python apps/email_rag.py --llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
|
||||||
|
|
||||||
# Document RAG with medium reasoning
|
|
||||||
python apps/document_rag.py --llm openai --llm-model gpt-4o --thinking-budget medium
|
|
||||||
```
|
|
||||||
|
|
||||||
## Supported Models
|
|
||||||
|
|
||||||
### Ollama Models
|
|
||||||
- **GPT-Oss:20b**: Primary target model with reasoning capabilities
|
|
||||||
- **Other reasoning models**: Any Ollama model that supports the `reasoning` parameter
|
|
||||||
|
|
||||||
### OpenAI Models
|
|
||||||
- **o3, o3-mini, o4-mini, o1**: o-series reasoning models with `reasoning_effort` parameter
|
|
||||||
- **GPT-OSS models**: Models that support reasoning capabilities
|
|
||||||
|
|
||||||
## Testing
|
|
||||||
|
|
||||||
The implementation includes comprehensive testing:
|
|
||||||
- Parameter handling verification
|
|
||||||
- Backend-specific API format validation
|
|
||||||
- CLI argument parsing tests
|
|
||||||
- Integration with existing LEANN architecture
|
|
||||||
@@ -1,285 +0,0 @@
|
|||||||
# LEANN Configuration Guide
|
|
||||||
|
|
||||||
This guide helps you optimize LEANN for different use cases and understand the trade-offs between various configuration options.
|
|
||||||
|
|
||||||
## Getting Started: Simple is Better
|
|
||||||
|
|
||||||
When first trying LEANN, start with a small dataset to quickly validate your approach:
|
|
||||||
|
|
||||||
**For document RAG**: The default `data/` directory works perfectly - includes 2 AI research papers, Pride and Prejudice literature, and a technical report
|
|
||||||
```bash
|
|
||||||
python -m apps.document_rag --query "What techniques does LEANN use?"
|
|
||||||
```
|
|
||||||
|
|
||||||
**For other data sources**: Limit the dataset size for quick testing
|
|
||||||
```bash
|
|
||||||
# WeChat: Test with recent messages only
|
|
||||||
python -m apps.wechat_rag --max-items 100 --query "What did we discuss about the project timeline?"
|
|
||||||
|
|
||||||
# Browser history: Last few days
|
|
||||||
python -m apps.browser_rag --max-items 500 --query "Find documentation about vector databases"
|
|
||||||
|
|
||||||
# Email: Recent inbox
|
|
||||||
python -m apps.email_rag --max-items 200 --query "Who sent updates about the deployment status?"
|
|
||||||
```
|
|
||||||
|
|
||||||
Once validated, scale up gradually:
|
|
||||||
- 100 documents → 1,000 → 10,000 → full dataset (`--max-items -1`)
|
|
||||||
- This helps identify issues early before committing to long processing times
|
|
||||||
|
|
||||||
## Embedding Model Selection: Understanding the Trade-offs
|
|
||||||
|
|
||||||
Based on our experience developing LEANN, embedding models fall into three categories:
|
|
||||||
|
|
||||||
### Small Models (< 100M parameters)
|
|
||||||
**Example**: `sentence-transformers/all-MiniLM-L6-v2` (22M params)
|
|
||||||
- **Pros**: Lightweight, fast for both indexing and inference
|
|
||||||
- **Cons**: Lower semantic understanding, may miss nuanced relationships
|
|
||||||
- **Use when**: Speed is critical, handling simple queries, interactive mode, or just experimenting with LEANN. If time is not a constraint, consider using a larger/better embedding model
|
|
||||||
|
|
||||||
### Medium Models (100M-500M parameters)
|
|
||||||
**Example**: `facebook/contriever` (110M params), `BAAI/bge-base-en-v1.5` (110M params)
|
|
||||||
- **Pros**: Balanced performance, good multilingual support, reasonable speed
|
|
||||||
- **Cons**: Requires more compute than small models
|
|
||||||
- **Use when**: Need quality results without extreme compute requirements, general-purpose RAG applications
|
|
||||||
|
|
||||||
### Large Models (500M+ parameters)
|
|
||||||
**Example**: `Qwen/Qwen3-Embedding-0.6B` (600M params), `intfloat/multilingual-e5-large` (560M params)
|
|
||||||
- **Pros**: Best semantic understanding, captures complex relationships, excellent multilingual support. **Qwen3-Embedding-0.6B achieves nearly OpenAI API performance!**
|
|
||||||
- **Cons**: Slower inference, longer index build times
|
|
||||||
- **Use when**: Quality is paramount and you have sufficient compute resources. **Highly recommended** for production use
|
|
||||||
|
|
||||||
### Quick Start: Cloud and Local Embedding Options
|
|
||||||
|
|
||||||
**OpenAI Embeddings (Fastest Setup)**
|
|
||||||
For immediate testing without local model downloads:
|
|
||||||
```bash
|
|
||||||
# Set OpenAI embeddings (requires OPENAI_API_KEY)
|
|
||||||
--embedding-mode openai --embedding-model text-embedding-3-small
|
|
||||||
```
|
|
||||||
|
|
||||||
**Ollama Embeddings (Privacy-Focused)**
|
|
||||||
For local embeddings with complete privacy:
|
|
||||||
```bash
|
|
||||||
# First, pull an embedding model
|
|
||||||
ollama pull nomic-embed-text
|
|
||||||
|
|
||||||
# Use Ollama embeddings
|
|
||||||
--embedding-mode ollama --embedding-model nomic-embed-text
|
|
||||||
```
|
|
||||||
|
|
||||||
<details>
|
|
||||||
<summary><strong>Cloud vs Local Trade-offs</strong></summary>
|
|
||||||
|
|
||||||
**OpenAI Embeddings** (`text-embedding-3-small/large`)
|
|
||||||
- **Pros**: No local compute needed, consistently fast, high quality
|
|
||||||
- **Cons**: Requires API key, costs money, data leaves your system, [known limitations with certain languages](https://yichuan-w.github.io/blog/lessons_learned_in_dev_leann/)
|
|
||||||
- **When to use**: Prototyping, non-sensitive data, need immediate results
|
|
||||||
|
|
||||||
**Local Embeddings**
|
|
||||||
- **Pros**: Complete privacy, no ongoing costs, full control, can sometimes outperform OpenAI embeddings
|
|
||||||
- **Cons**: Slower than cloud APIs, requires local compute resources
|
|
||||||
- **When to use**: Production systems, sensitive data, cost-sensitive applications
|
|
||||||
|
|
||||||
</details>
|
|
||||||
|
|
||||||
## Index Selection: Matching Your Scale
|
|
||||||
|
|
||||||
### HNSW (Hierarchical Navigable Small World)
|
|
||||||
**Best for**: Small to medium datasets (< 10M vectors) - **Default and recommended for extreme low storage**
|
|
||||||
- Full recomputation required
|
|
||||||
- High memory usage during build phase
|
|
||||||
- Excellent recall (95%+)
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Optimal for most use cases
|
|
||||||
--backend-name hnsw --graph-degree 32 --build-complexity 64
|
|
||||||
```
|
|
||||||
|
|
||||||
### DiskANN
|
|
||||||
**Best for**: Large datasets (> 10M vectors, 10GB+ index size) - **⚠️ Beta version, still in active development**
|
|
||||||
- Uses Product Quantization (PQ) for coarse filtering during graph traversal
|
|
||||||
- Novel approach: stores only PQ codes, performs rerank with exact computation in final step
|
|
||||||
- Implements a corner case of double-queue: prunes all neighbors and recomputes at the end
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# For billion-scale deployments
|
|
||||||
--backend-name diskann --graph-degree 64 --build-complexity 128
|
|
||||||
```
|
|
||||||
|
|
||||||
## LLM Selection: Engine and Model Comparison
|
|
||||||
|
|
||||||
### LLM Engines
|
|
||||||
|
|
||||||
**OpenAI** (`--llm openai`)
|
|
||||||
- **Pros**: Best quality, consistent performance, no local resources needed
|
|
||||||
- **Cons**: Costs money ($0.15-2.5 per million tokens), requires internet, data privacy concerns
|
|
||||||
- **Models**: `gpt-4o-mini` (fast, cheap), `gpt-4o` (best quality), `o3` (reasoning), `o3-mini` (reasoning, cheaper)
|
|
||||||
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for o-series reasoning models (o3, o3-mini, o4-mini)
|
|
||||||
- **Note**: Our current default, but we recommend switching to Ollama for most use cases
|
|
||||||
|
|
||||||
**Ollama** (`--llm ollama`)
|
|
||||||
- **Pros**: Fully local, free, privacy-preserving, good model variety
|
|
||||||
- **Cons**: Requires local GPU/CPU resources, slower than cloud APIs, need to install extra [ollama app](https://github.com/ollama/ollama?tab=readme-ov-file#ollama) and pre-download models by `ollama pull`
|
|
||||||
- **Models**: `qwen3:0.6b` (ultra-fast), `qwen3:1.7b` (balanced), `qwen3:4b` (good quality), `qwen3:7b` (high quality), `deepseek-r1:1.5b` (reasoning)
|
|
||||||
- **Thinking Budget**: Use `--thinking-budget low/medium/high` for reasoning models like GPT-Oss:20b
|
|
||||||
|
|
||||||
**HuggingFace** (`--llm hf`)
|
|
||||||
- **Pros**: Free tier available, huge model selection, direct model loading (vs Ollama's server-based approach)
|
|
||||||
- **Cons**: More complex initial setup
|
|
||||||
- **Models**: `Qwen/Qwen3-1.7B-FP8`
|
|
||||||
|
|
||||||
## Parameter Tuning Guide
|
|
||||||
|
|
||||||
### Search Complexity Parameters
|
|
||||||
|
|
||||||
**`--build-complexity`** (index building)
|
|
||||||
- Controls thoroughness during index construction
|
|
||||||
- Higher = better recall but slower build
|
|
||||||
- Recommendations:
|
|
||||||
- 32: Quick prototyping
|
|
||||||
- 64: Balanced (default)
|
|
||||||
- 128: Production systems
|
|
||||||
- 256: Maximum quality
|
|
||||||
|
|
||||||
**`--search-complexity`** (query time)
|
|
||||||
- Controls search thoroughness
|
|
||||||
- Higher = better results but slower
|
|
||||||
- Recommendations:
|
|
||||||
- 16: Fast/Interactive search
|
|
||||||
- 32: High quality with diversity
|
|
||||||
- 64+: Maximum accuracy
|
|
||||||
|
|
||||||
### Top-K Selection
|
|
||||||
|
|
||||||
**`--top-k`** (number of retrieved chunks)
|
|
||||||
- More chunks = better context but slower LLM processing
|
|
||||||
- Should be always smaller than `--search-complexity`
|
|
||||||
- Guidelines:
|
|
||||||
- 10-20: General questions (default: 20)
|
|
||||||
- 30+: Complex multi-hop reasoning requiring comprehensive context
|
|
||||||
|
|
||||||
**Trade-off formula**:
|
|
||||||
- Retrieval time ∝ log(n) × search_complexity
|
|
||||||
- LLM processing time ∝ top_k × chunk_size
|
|
||||||
- Total context = top_k × chunk_size tokens
|
|
||||||
|
|
||||||
### Thinking Budget for Reasoning Models
|
|
||||||
|
|
||||||
**`--thinking-budget`** (reasoning effort level)
|
|
||||||
- Controls the computational effort for reasoning models
|
|
||||||
- Options: `low`, `medium`, `high`
|
|
||||||
- Guidelines:
|
|
||||||
- `low`: Fast responses, basic reasoning (default for simple queries)
|
|
||||||
- `medium`: Balanced speed and reasoning depth
|
|
||||||
- `high`: Maximum reasoning effort, best for complex analytical questions
|
|
||||||
- **Supported Models**:
|
|
||||||
- **Ollama**: `gpt-oss:20b`, `gpt-oss:120b`
|
|
||||||
- **OpenAI**: `o3`, `o3-mini`, `o4-mini`, `o1` (o-series reasoning models)
|
|
||||||
- **Note**: Models without reasoning support will show a warning and proceed without reasoning parameters
|
|
||||||
- **Example**: `--thinking-budget high` for complex analytical questions
|
|
||||||
|
|
||||||
**📖 For detailed usage examples and implementation details, check out [Thinking Budget Documentation](THINKING_BUDGET_FEATURE.md)**
|
|
||||||
|
|
||||||
**💡 Quick Examples:**
|
|
||||||
```bash
|
|
||||||
# OpenAI o-series reasoning model
|
|
||||||
python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
|
|
||||||
--index-dir hnswbuild --backend hnsw \
|
|
||||||
--llm openai --llm-model o3 --thinking-budget medium
|
|
||||||
|
|
||||||
# Ollama reasoning model
|
|
||||||
python apps/document_rag.py --query "What are the main techniques LEANN explores?" \
|
|
||||||
--index-dir hnswbuild --backend hnsw \
|
|
||||||
--llm ollama --llm-model gpt-oss:20b --thinking-budget high
|
|
||||||
```
|
|
||||||
|
|
||||||
### Graph Degree (HNSW/DiskANN)
|
|
||||||
|
|
||||||
**`--graph-degree`**
|
|
||||||
- Number of connections per node in the graph
|
|
||||||
- Higher = better recall but more memory
|
|
||||||
- HNSW: 16-32 (default: 32)
|
|
||||||
- DiskANN: 32-128 (default: 64)
|
|
||||||
|
|
||||||
|
|
||||||
## Performance Optimization Checklist
|
|
||||||
|
|
||||||
### If Embedding is Too Slow
|
|
||||||
|
|
||||||
1. **Switch to smaller model**:
|
|
||||||
```bash
|
|
||||||
# From large model
|
|
||||||
--embedding-model Qwen/Qwen3-Embedding-0.6B
|
|
||||||
# To small model
|
|
||||||
--embedding-model sentence-transformers/all-MiniLM-L6-v2
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Limit dataset size for testing**:
|
|
||||||
```bash
|
|
||||||
--max-items 1000 # Process first 1k items only
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Use MLX on Apple Silicon** (optional optimization):
|
|
||||||
```bash
|
|
||||||
--embedding-mode mlx --embedding-model mlx-community/Qwen3-Embedding-0.6B-8bit
|
|
||||||
```
|
|
||||||
MLX might not be the best choice, as we tested and found that it only offers 1.3x acceleration compared to HF, so maybe using ollama is a better choice for embedding generation
|
|
||||||
|
|
||||||
4. **Use Ollama**
|
|
||||||
```bash
|
|
||||||
--embedding-mode ollama --embedding-model nomic-embed-text
|
|
||||||
```
|
|
||||||
To discover additional embedding models in ollama, check out https://ollama.com/search?c=embedding or read more about embedding models at https://ollama.com/blog/embedding-models, please do check the model size that works best for you
|
|
||||||
### If Search Quality is Poor
|
|
||||||
|
|
||||||
1. **Increase retrieval count**:
|
|
||||||
```bash
|
|
||||||
--top-k 30 # Retrieve more candidates
|
|
||||||
```
|
|
||||||
|
|
||||||
2. **Upgrade embedding model**:
|
|
||||||
```bash
|
|
||||||
# For English
|
|
||||||
--embedding-model BAAI/bge-base-en-v1.5
|
|
||||||
# For multilingual
|
|
||||||
--embedding-model intfloat/multilingual-e5-large
|
|
||||||
```
|
|
||||||
|
|
||||||
## Understanding the Trade-offs
|
|
||||||
|
|
||||||
Every configuration choice involves trade-offs:
|
|
||||||
|
|
||||||
| Factor | Small/Fast | Large/Quality |
|
|
||||||
|--------|------------|---------------|
|
|
||||||
| Embedding Model | `all-MiniLM-L6-v2` | `Qwen/Qwen3-Embedding-0.6B` |
|
|
||||||
| Chunk Size | 512 tokens | 128 tokens |
|
|
||||||
| Index Type | HNSW | DiskANN |
|
|
||||||
| LLM | `qwen3:1.7b` | `gpt-4o` |
|
|
||||||
|
|
||||||
The key is finding the right balance for your specific use case. Start small and simple, measure performance, then scale up only where needed.
|
|
||||||
|
|
||||||
## Deep Dive: Critical Configuration Decisions
|
|
||||||
|
|
||||||
### When to Disable Recomputation
|
|
||||||
|
|
||||||
LEANN's recomputation feature provides exact distance calculations but can be disabled for extreme QPS requirements:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
--no-recompute # Disable selective recomputation
|
|
||||||
```
|
|
||||||
|
|
||||||
**Trade-offs**:
|
|
||||||
- **With recomputation** (default): Exact distances, best quality, higher latency, minimal storage (only stores metadata, recomputes embeddings on-demand)
|
|
||||||
- **Without recomputation**: Must store full embeddings, significantly higher memory and storage usage (10-100x more), but faster search
|
|
||||||
|
|
||||||
**Disable when**:
|
|
||||||
- You have abundant storage and memory
|
|
||||||
- Need extremely low latency (< 100ms)
|
|
||||||
- Running a read-heavy workload where storage cost is acceptable
|
|
||||||
|
|
||||||
## Further Reading
|
|
||||||
|
|
||||||
- [Lessons Learned Developing LEANN](https://yichuan-w.github.io/blog/lessons_learned_in_dev_leann/)
|
|
||||||
- [LEANN Technical Paper](https://arxiv.org/abs/2506.08276)
|
|
||||||
- [DiskANN Original Paper](https://papers.nips.cc/paper/2019/file/09853c7fb1d3f8ee67a61b6bf4a7f8e6-Paper.pdf)
|
|
||||||
@@ -5,7 +5,7 @@
|
|||||||
- **🔄 Real-time Embeddings** - Eliminate heavy embedding storage with dynamic computation using optimized ZMQ servers and highly optimized search paradigm (overlapping and batching) with highly optimized embedding engine
|
- **🔄 Real-time Embeddings** - Eliminate heavy embedding storage with dynamic computation using optimized ZMQ servers and highly optimized search paradigm (overlapping and batching) with highly optimized embedding engine
|
||||||
- **📈 Scalable Architecture** - Handles millions of documents on consumer hardware; the larger your dataset, the more LEANN can save
|
- **📈 Scalable Architecture** - Handles millions of documents on consumer hardware; the larger your dataset, the more LEANN can save
|
||||||
- **🎯 Graph Pruning** - Advanced techniques to minimize the storage overhead of vector search to a limited footprint
|
- **🎯 Graph Pruning** - Advanced techniques to minimize the storage overhead of vector search to a limited footprint
|
||||||
- **🏗️ Pluggable Backends** - HNSW/FAISS (default), with optional DiskANN for large-scale deployments
|
- **🏗️ Pluggable Backends** - DiskANN, HNSW/FAISS with unified API
|
||||||
|
|
||||||
## 🛠️ Technical Highlights
|
## 🛠️ Technical Highlights
|
||||||
- **🔄 Recompute Mode** - Highest accuracy scenarios while eliminating vector storage overhead
|
- **🔄 Recompute Mode** - Highest accuracy scenarios while eliminating vector storage overhead
|
||||||
|
|||||||
@@ -2,8 +2,8 @@
|
|||||||
|
|
||||||
## 🎯 Q2 2025
|
## 🎯 Q2 2025
|
||||||
|
|
||||||
- [X] HNSW backend integration
|
|
||||||
- [X] DiskANN backend with MIPS/L2/Cosine support
|
- [X] DiskANN backend with MIPS/L2/Cosine support
|
||||||
|
- [X] HNSW backend integration
|
||||||
- [X] Real-time embedding pipeline
|
- [X] Real-time embedding pipeline
|
||||||
- [X] Memory-efficient graph pruning
|
- [X] Memory-efficient graph pruning
|
||||||
|
|
||||||
|
|||||||
8
packages/leann-backend-diskann/CMakeLists.txt
Normal file
8
packages/leann-backend-diskann/CMakeLists.txt
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
# packages/leann-backend-diskann/CMakeLists.txt (simplified version)
|
||||||
|
|
||||||
|
cmake_minimum_required(VERSION 3.20)
|
||||||
|
project(leann_backend_diskann_wrapper)
|
||||||
|
|
||||||
|
# Tell CMake to directly enter the DiskANN submodule and execute its own CMakeLists.txt
|
||||||
|
# DiskANN will handle everything itself, including compiling Python bindings
|
||||||
|
add_subdirectory(src/third_party/DiskANN)
|
||||||
@@ -4,10 +4,9 @@ import os
|
|||||||
import struct
|
import struct
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Literal, Optional
|
from typing import Any, Literal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import psutil
|
|
||||||
from leann.interface import (
|
from leann.interface import (
|
||||||
LeannBackendBuilderInterface,
|
LeannBackendBuilderInterface,
|
||||||
LeannBackendFactoryInterface,
|
LeannBackendFactoryInterface,
|
||||||
@@ -85,43 +84,6 @@ def _write_vectors_to_bin(data: np.ndarray, file_path: Path):
|
|||||||
f.write(data.tobytes())
|
f.write(data.tobytes())
|
||||||
|
|
||||||
|
|
||||||
def _calculate_smart_memory_config(data: np.ndarray) -> tuple[float, float]:
|
|
||||||
"""
|
|
||||||
Calculate smart memory configuration for DiskANN based on data size and system specs.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
data: The embedding data array
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
tuple: (search_memory_maximum, build_memory_maximum) in GB
|
|
||||||
"""
|
|
||||||
num_vectors, dim = data.shape
|
|
||||||
|
|
||||||
# Calculate embedding storage size
|
|
||||||
embedding_size_bytes = num_vectors * dim * 4 # float32 = 4 bytes
|
|
||||||
embedding_size_gb = embedding_size_bytes / (1024**3)
|
|
||||||
|
|
||||||
# search_memory_maximum: 1/10 of embedding size for optimal PQ compression
|
|
||||||
# This controls Product Quantization size - smaller means more compression
|
|
||||||
search_memory_gb = max(0.1, embedding_size_gb / 10) # At least 100MB
|
|
||||||
|
|
||||||
# build_memory_maximum: Based on available system RAM for sharding control
|
|
||||||
# This controls how much memory DiskANN uses during index construction
|
|
||||||
available_memory_gb = psutil.virtual_memory().available / (1024**3)
|
|
||||||
total_memory_gb = psutil.virtual_memory().total / (1024**3)
|
|
||||||
|
|
||||||
# Use 50% of available memory, but at least 2GB and at most 75% of total
|
|
||||||
build_memory_gb = max(2.0, min(available_memory_gb * 0.5, total_memory_gb * 0.75))
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Smart memory config - Data: {embedding_size_gb:.2f}GB, "
|
|
||||||
f"Search mem: {search_memory_gb:.2f}GB (PQ control), "
|
|
||||||
f"Build mem: {build_memory_gb:.2f}GB (sharding control)"
|
|
||||||
)
|
|
||||||
|
|
||||||
return search_memory_gb, build_memory_gb
|
|
||||||
|
|
||||||
|
|
||||||
@register_backend("diskann")
|
@register_backend("diskann")
|
||||||
class DiskannBackend(LeannBackendFactoryInterface):
|
class DiskannBackend(LeannBackendFactoryInterface):
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -159,16 +121,6 @@ class DiskannBuilder(LeannBackendBuilderInterface):
|
|||||||
f"Unsupported distance_metric '{build_kwargs.get('distance_metric', 'unknown')}'."
|
f"Unsupported distance_metric '{build_kwargs.get('distance_metric', 'unknown')}'."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Calculate smart memory configuration if not explicitly provided
|
|
||||||
if (
|
|
||||||
"search_memory_maximum" not in build_kwargs
|
|
||||||
or "build_memory_maximum" not in build_kwargs
|
|
||||||
):
|
|
||||||
smart_search_mem, smart_build_mem = _calculate_smart_memory_config(data)
|
|
||||||
else:
|
|
||||||
smart_search_mem = build_kwargs.get("search_memory_maximum", 4.0)
|
|
||||||
smart_build_mem = build_kwargs.get("build_memory_maximum", 8.0)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from . import _diskannpy as diskannpy # type: ignore
|
from . import _diskannpy as diskannpy # type: ignore
|
||||||
|
|
||||||
@@ -179,8 +131,8 @@ class DiskannBuilder(LeannBackendBuilderInterface):
|
|||||||
index_prefix,
|
index_prefix,
|
||||||
build_kwargs.get("complexity", 64),
|
build_kwargs.get("complexity", 64),
|
||||||
build_kwargs.get("graph_degree", 32),
|
build_kwargs.get("graph_degree", 32),
|
||||||
build_kwargs.get("search_memory_maximum", smart_search_mem),
|
build_kwargs.get("search_memory_maximum", 4.0),
|
||||||
build_kwargs.get("build_memory_maximum", smart_build_mem),
|
build_kwargs.get("build_memory_maximum", 8.0),
|
||||||
build_kwargs.get("num_threads", 8),
|
build_kwargs.get("num_threads", 8),
|
||||||
build_kwargs.get("pq_disk_bytes", 0),
|
build_kwargs.get("pq_disk_bytes", 0),
|
||||||
"",
|
"",
|
||||||
@@ -259,7 +211,7 @@ class DiskannSearcher(BaseSearcher):
|
|||||||
prune_ratio: float = 0.0,
|
prune_ratio: float = 0.0,
|
||||||
recompute_embeddings: bool = False,
|
recompute_embeddings: bool = False,
|
||||||
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
||||||
zmq_port: Optional[int] = None,
|
zmq_port: int | None = None,
|
||||||
batch_recompute: bool = False,
|
batch_recompute: bool = False,
|
||||||
dedup_node_dis: bool = False,
|
dedup_node_dis: bool = False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
@@ -312,8 +264,6 @@ class DiskannSearcher(BaseSearcher):
|
|||||||
use_global_pruning = True
|
use_global_pruning = True
|
||||||
|
|
||||||
# Perform search with suppressed C++ output based on log level
|
# Perform search with suppressed C++ output based on log level
|
||||||
use_deferred_fetch = kwargs.get("USE_DEFERRED_FETCH", True)
|
|
||||||
recompute_neighors = False
|
|
||||||
with suppress_cpp_output_if_needed():
|
with suppress_cpp_output_if_needed():
|
||||||
labels, distances = self._index.batch_search(
|
labels, distances = self._index.batch_search(
|
||||||
query,
|
query,
|
||||||
@@ -322,9 +272,9 @@ class DiskannSearcher(BaseSearcher):
|
|||||||
complexity,
|
complexity,
|
||||||
beam_width,
|
beam_width,
|
||||||
self.num_threads,
|
self.num_threads,
|
||||||
use_deferred_fetch,
|
kwargs.get("USE_DEFERRED_FETCH", False),
|
||||||
kwargs.get("skip_search_reorder", False),
|
kwargs.get("skip_search_reorder", False),
|
||||||
recompute_neighors,
|
recompute_embeddings,
|
||||||
dedup_node_dis,
|
dedup_node_dis,
|
||||||
prune_ratio,
|
prune_ratio,
|
||||||
batch_recompute,
|
batch_recompute,
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ import sys
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import zmq
|
import zmq
|
||||||
@@ -33,7 +32,7 @@ if not logger.handlers:
|
|||||||
|
|
||||||
|
|
||||||
def create_diskann_embedding_server(
|
def create_diskann_embedding_server(
|
||||||
passages_file: Optional[str] = None,
|
passages_file: str | None = None,
|
||||||
zmq_port: int = 5555,
|
zmq_port: int = 5555,
|
||||||
model_name: str = "sentence-transformers/all-mpnet-base-v2",
|
model_name: str = "sentence-transformers/all-mpnet-base-v2",
|
||||||
embedding_mode: str = "sentence-transformers",
|
embedding_mode: str = "sentence-transformers",
|
||||||
@@ -262,7 +261,7 @@ if __name__ == "__main__":
|
|||||||
"--embedding-mode",
|
"--embedding-mode",
|
||||||
type=str,
|
type=str,
|
||||||
default="sentence-transformers",
|
default="sentence-transformers",
|
||||||
choices=["sentence-transformers", "openai", "mlx", "ollama"],
|
choices=["sentence-transformers", "openai", "mlx"],
|
||||||
help="Embedding backend mode",
|
help="Embedding backend mode",
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-backend-diskann"
|
name = "leann-backend-diskann"
|
||||||
version = "0.2.7"
|
version = "0.1.16"
|
||||||
dependencies = ["leann-core==0.2.7", "numpy", "protobuf>=3.19.0"]
|
dependencies = ["leann-core==0.1.16", "numpy", "protobuf>=3.19.0"]
|
||||||
|
|
||||||
[tool.scikit-build]
|
[tool.scikit-build]
|
||||||
# Key: simplified CMake path
|
# Key: simplified CMake path
|
||||||
@@ -17,5 +17,3 @@ editable.mode = "redirect"
|
|||||||
cmake.build-type = "Release"
|
cmake.build-type = "Release"
|
||||||
build.verbose = true
|
build.verbose = true
|
||||||
build.tool-args = ["-j8"]
|
build.tool-args = ["-j8"]
|
||||||
# Let CMake find packages via Homebrew prefix
|
|
||||||
cmake.define = {CMAKE_PREFIX_PATH = {env = "CMAKE_PREFIX_PATH"}, OpenMP_ROOT = {env = "OpenMP_ROOT"}}
|
|
||||||
|
|||||||
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: 04048bb302...af2a26481e
@@ -5,20 +5,11 @@ set(CMAKE_CXX_COMPILER_WORKS 1)
|
|||||||
|
|
||||||
# Set OpenMP path for macOS
|
# Set OpenMP path for macOS
|
||||||
if(APPLE)
|
if(APPLE)
|
||||||
# Detect Homebrew installation path (Apple Silicon vs Intel)
|
set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I/opt/homebrew/opt/libomp/include")
|
||||||
if(EXISTS "/opt/homebrew/opt/libomp")
|
set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I/opt/homebrew/opt/libomp/include")
|
||||||
set(HOMEBREW_PREFIX "/opt/homebrew")
|
|
||||||
elseif(EXISTS "/usr/local/opt/libomp")
|
|
||||||
set(HOMEBREW_PREFIX "/usr/local")
|
|
||||||
else()
|
|
||||||
message(FATAL_ERROR "Could not find libomp installation. Please install with: brew install libomp")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include")
|
|
||||||
set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include")
|
|
||||||
set(OpenMP_C_LIB_NAMES "omp")
|
set(OpenMP_C_LIB_NAMES "omp")
|
||||||
set(OpenMP_CXX_LIB_NAMES "omp")
|
set(OpenMP_CXX_LIB_NAMES "omp")
|
||||||
set(OpenMP_omp_LIBRARY "${HOMEBREW_PREFIX}/opt/libomp/lib/libomp.dylib")
|
set(OpenMP_omp_LIBRARY "/opt/homebrew/opt/libomp/lib/libomp.dylib")
|
||||||
|
|
||||||
# Force use of system libc++ to avoid version mismatch
|
# Force use of system libc++ to avoid version mismatch
|
||||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Literal, Optional
|
from typing import Any, Literal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from leann.interface import (
|
from leann.interface import (
|
||||||
@@ -152,7 +152,7 @@ class HNSWSearcher(BaseSearcher):
|
|||||||
self,
|
self,
|
||||||
query: np.ndarray,
|
query: np.ndarray,
|
||||||
top_k: int,
|
top_k: int,
|
||||||
zmq_port: Optional[int] = None,
|
zmq_port: int | None = None,
|
||||||
complexity: int = 64,
|
complexity: int = 64,
|
||||||
beam_width: int = 1,
|
beam_width: int = 1,
|
||||||
prune_ratio: float = 0.0,
|
prune_ratio: float = 0.0,
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ import sys
|
|||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
import msgpack
|
import msgpack
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@@ -34,7 +33,7 @@ if not logger.handlers:
|
|||||||
|
|
||||||
|
|
||||||
def create_hnsw_embedding_server(
|
def create_hnsw_embedding_server(
|
||||||
passages_file: Union[str, None] = None,
|
passages_file: str | None = None,
|
||||||
zmq_port: int = 5555,
|
zmq_port: int = 5555,
|
||||||
model_name: str = "sentence-transformers/all-mpnet-base-v2",
|
model_name: str = "sentence-transformers/all-mpnet-base-v2",
|
||||||
distance_metric: str = "mips",
|
distance_metric: str = "mips",
|
||||||
@@ -296,7 +295,7 @@ if __name__ == "__main__":
|
|||||||
"--embedding-mode",
|
"--embedding-mode",
|
||||||
type=str,
|
type=str,
|
||||||
default="sentence-transformers",
|
default="sentence-transformers",
|
||||||
choices=["sentence-transformers", "openai", "mlx", "ollama"],
|
choices=["sentence-transformers", "openai", "mlx"],
|
||||||
help="Embedding backend mode",
|
help="Embedding backend mode",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-backend-hnsw"
|
name = "leann-backend-hnsw"
|
||||||
version = "0.2.7"
|
version = "0.1.16"
|
||||||
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"leann-core==0.2.7",
|
"leann-core==0.1.16",
|
||||||
"numpy",
|
"numpy",
|
||||||
"pyzmq>=23.0.0",
|
"pyzmq>=23.0.0",
|
||||||
"msgpack>=1.0.0",
|
"msgpack>=1.0.0",
|
||||||
@@ -22,8 +22,6 @@ cmake.build-type = "Release"
|
|||||||
build.verbose = true
|
build.verbose = true
|
||||||
build.tool-args = ["-j8"]
|
build.tool-args = ["-j8"]
|
||||||
|
|
||||||
# CMake definitions to optimize compilation and find Homebrew packages
|
# CMake definitions to optimize compilation
|
||||||
[tool.scikit-build.cmake.define]
|
[tool.scikit-build.cmake.define]
|
||||||
CMAKE_BUILD_PARALLEL_LEVEL = "8"
|
CMAKE_BUILD_PARALLEL_LEVEL = "8"
|
||||||
CMAKE_PREFIX_PATH = {env = "CMAKE_PREFIX_PATH"}
|
|
||||||
OpenMP_ROOT = {env = "OpenMP_ROOT"}
|
|
||||||
|
|||||||
Submodule packages/leann-backend-hnsw/third_party/faiss updated: 4a2c0d67d3...ff22e2c86b
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-core"
|
name = "leann-core"
|
||||||
version = "0.2.7"
|
version = "0.1.16"
|
||||||
description = "Core API and plugin system for LEANN"
|
description = "Core API and plugin system for LEANN"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
@@ -31,10 +31,8 @@ dependencies = [
|
|||||||
"PyPDF2>=3.0.0",
|
"PyPDF2>=3.0.0",
|
||||||
"pymupdf>=1.23.0",
|
"pymupdf>=1.23.0",
|
||||||
"pdfplumber>=0.10.0",
|
"pdfplumber>=0.10.0",
|
||||||
"nbconvert>=7.0.0", # For .ipynb file support
|
"mlx>=0.26.3; sys_platform == 'darwin'",
|
||||||
"gitignore-parser>=0.1.12", # For proper .gitignore handling
|
"mlx-lm>=0.26.0; sys_platform == 'darwin'",
|
||||||
"mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
|
||||||
"mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
@@ -46,7 +44,6 @@ colab = [
|
|||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
leann = "leann.cli:main"
|
leann = "leann.cli:main"
|
||||||
leann_mcp = "leann.mcp:main"
|
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
where = ["src"]
|
where = ["src"]
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import time
|
|||||||
import warnings
|
import warnings
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Literal, Optional
|
from typing import Any, Literal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -33,7 +33,7 @@ def compute_embeddings(
|
|||||||
model_name: str,
|
model_name: str,
|
||||||
mode: str = "sentence-transformers",
|
mode: str = "sentence-transformers",
|
||||||
use_server: bool = True,
|
use_server: bool = True,
|
||||||
port: Optional[int] = None,
|
port: int | None = None,
|
||||||
is_build=False,
|
is_build=False,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
@@ -157,12 +157,12 @@ class LeannBuilder:
|
|||||||
self,
|
self,
|
||||||
backend_name: str,
|
backend_name: str,
|
||||||
embedding_model: str = "facebook/contriever",
|
embedding_model: str = "facebook/contriever",
|
||||||
dimensions: Optional[int] = None,
|
dimensions: int | None = None,
|
||||||
embedding_mode: str = "sentence-transformers",
|
embedding_mode: str = "sentence-transformers",
|
||||||
**backend_kwargs,
|
**backend_kwargs,
|
||||||
):
|
):
|
||||||
self.backend_name = backend_name
|
self.backend_name = backend_name
|
||||||
backend_factory: Optional[LeannBackendFactoryInterface] = BACKEND_REGISTRY.get(backend_name)
|
backend_factory: LeannBackendFactoryInterface | None = BACKEND_REGISTRY.get(backend_name)
|
||||||
if backend_factory is None:
|
if backend_factory is None:
|
||||||
raise ValueError(f"Backend '{backend_name}' not found or not registered.")
|
raise ValueError(f"Backend '{backend_name}' not found or not registered.")
|
||||||
self.backend_factory = backend_factory
|
self.backend_factory = backend_factory
|
||||||
@@ -242,7 +242,7 @@ class LeannBuilder:
|
|||||||
self.backend_kwargs = backend_kwargs
|
self.backend_kwargs = backend_kwargs
|
||||||
self.chunks: list[dict[str, Any]] = []
|
self.chunks: list[dict[str, Any]] = []
|
||||||
|
|
||||||
def add_text(self, text: str, metadata: Optional[dict[str, Any]] = None):
|
def add_text(self, text: str, metadata: dict[str, Any] | None = None):
|
||||||
if metadata is None:
|
if metadata is None:
|
||||||
metadata = {}
|
metadata = {}
|
||||||
passage_id = metadata.get("id", str(len(self.chunks)))
|
passage_id = metadata.get("id", str(len(self.chunks)))
|
||||||
@@ -554,7 +554,7 @@ class LeannSearcher:
|
|||||||
if "labels" in results and "distances" in results:
|
if "labels" in results and "distances" in results:
|
||||||
logger.info(f" Processing {len(results['labels'][0])} passage IDs:")
|
logger.info(f" Processing {len(results['labels'][0])} passage IDs:")
|
||||||
for i, (string_id, dist) in enumerate(
|
for i, (string_id, dist) in enumerate(
|
||||||
zip(results["labels"][0], results["distances"][0])
|
zip(results["labels"][0], results["distances"][0], strict=False)
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
passage_data = self.passage_manager.get_passage(string_id)
|
passage_data = self.passage_manager.get_passage(string_id)
|
||||||
@@ -592,7 +592,7 @@ class LeannChat:
|
|||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
index_path: str,
|
index_path: str,
|
||||||
llm_config: Optional[dict[str, Any]] = None,
|
llm_config: dict[str, Any] | None = None,
|
||||||
enable_warmup: bool = False,
|
enable_warmup: bool = False,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
):
|
):
|
||||||
@@ -608,7 +608,7 @@ class LeannChat:
|
|||||||
prune_ratio: float = 0.0,
|
prune_ratio: float = 0.0,
|
||||||
recompute_embeddings: bool = True,
|
recompute_embeddings: bool = True,
|
||||||
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
||||||
llm_kwargs: Optional[dict[str, Any]] = None,
|
llm_kwargs: dict[str, Any] | None = None,
|
||||||
expected_zmq_port: int = 5557,
|
expected_zmq_port: int = 5557,
|
||||||
**search_kwargs,
|
**search_kwargs,
|
||||||
):
|
):
|
||||||
@@ -636,10 +636,7 @@ class LeannChat:
|
|||||||
"Please provide the best answer you can based on this context and your knowledge."
|
"Please provide the best answer you can based on this context and your knowledge."
|
||||||
)
|
)
|
||||||
|
|
||||||
ask_time = time.time()
|
|
||||||
ans = self.llm.ask(prompt, **llm_kwargs)
|
ans = self.llm.ask(prompt, **llm_kwargs)
|
||||||
ask_time = time.time() - ask_time
|
|
||||||
logger.info(f" Ask time: {ask_time} seconds")
|
|
||||||
return ans
|
return ans
|
||||||
|
|
||||||
def start_interactive(self):
|
def start_interactive(self):
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import difflib
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Optional
|
from typing import Any
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@@ -17,12 +17,12 @@ logging.basicConfig(level=logging.INFO)
|
|||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def check_ollama_models(host: str) -> list[str]:
|
def check_ollama_models() -> list[str]:
|
||||||
"""Check available Ollama models and return a list"""
|
"""Check available Ollama models and return a list"""
|
||||||
try:
|
try:
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
response = requests.get(f"{host}/api/tags", timeout=5)
|
response = requests.get("http://localhost:11434/api/tags", timeout=5)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
data = response.json()
|
data = response.json()
|
||||||
return [model["name"] for model in data.get("models", [])]
|
return [model["name"] for model in data.get("models", [])]
|
||||||
@@ -309,12 +309,10 @@ def search_hf_models(query: str, limit: int = 10) -> list[str]:
|
|||||||
return search_hf_models_fuzzy(query, limit)
|
return search_hf_models_fuzzy(query, limit)
|
||||||
|
|
||||||
|
|
||||||
def validate_model_and_suggest(
|
def validate_model_and_suggest(model_name: str, llm_type: str) -> str | None:
|
||||||
model_name: str, llm_type: str, host: str = "http://localhost:11434"
|
|
||||||
) -> Optional[str]:
|
|
||||||
"""Validate model name and provide suggestions if invalid"""
|
"""Validate model name and provide suggestions if invalid"""
|
||||||
if llm_type == "ollama":
|
if llm_type == "ollama":
|
||||||
available_models = check_ollama_models(host)
|
available_models = check_ollama_models()
|
||||||
if available_models and model_name not in available_models:
|
if available_models and model_name not in available_models:
|
||||||
error_msg = f"Model '{model_name}' not found in your local Ollama installation."
|
error_msg = f"Model '{model_name}' not found in your local Ollama installation."
|
||||||
|
|
||||||
@@ -360,11 +358,7 @@ def validate_model_and_suggest(
|
|||||||
error_msg += f"\n\nModel '{model_name}' was not found in Ollama's library."
|
error_msg += f"\n\nModel '{model_name}' was not found in Ollama's library."
|
||||||
|
|
||||||
if suggestions:
|
if suggestions:
|
||||||
error_msg += (
|
error_msg += "\n\nDid you mean one of these installed models?\n"
|
||||||
"\n\nDid you mean one of these installed models?\n"
|
|
||||||
+ "\nTry to use ollama pull to install the model you need\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
for i, suggestion in enumerate(suggestions, 1):
|
for i, suggestion in enumerate(suggestions, 1):
|
||||||
error_msg += f" {i}. {suggestion}\n"
|
error_msg += f" {i}. {suggestion}\n"
|
||||||
else:
|
else:
|
||||||
@@ -471,7 +465,7 @@ class OllamaChat(LLMInterface):
|
|||||||
requests.get(host)
|
requests.get(host)
|
||||||
|
|
||||||
# Pre-check model availability with helpful suggestions
|
# Pre-check model availability with helpful suggestions
|
||||||
model_error = validate_model_and_suggest(model, "ollama", host)
|
model_error = validate_model_and_suggest(model, "ollama")
|
||||||
if model_error:
|
if model_error:
|
||||||
raise ValueError(model_error)
|
raise ValueError(model_error)
|
||||||
|
|
||||||
@@ -491,35 +485,11 @@ class OllamaChat(LLMInterface):
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
full_url = f"{self.host}/api/generate"
|
full_url = f"{self.host}/api/generate"
|
||||||
|
|
||||||
# Handle thinking budget for reasoning models
|
|
||||||
options = kwargs.copy()
|
|
||||||
thinking_budget = kwargs.get("thinking_budget")
|
|
||||||
if thinking_budget:
|
|
||||||
# Remove thinking_budget from options as it's not a standard Ollama option
|
|
||||||
options.pop("thinking_budget", None)
|
|
||||||
# Only apply reasoning parameters to models that support it
|
|
||||||
reasoning_supported_models = [
|
|
||||||
"gpt-oss:20b",
|
|
||||||
"gpt-oss:120b",
|
|
||||||
"deepseek-r1",
|
|
||||||
"deepseek-coder",
|
|
||||||
]
|
|
||||||
|
|
||||||
if thinking_budget in ["low", "medium", "high"]:
|
|
||||||
if any(model in self.model.lower() for model in reasoning_supported_models):
|
|
||||||
options["reasoning"] = {"effort": thinking_budget, "exclude": False}
|
|
||||||
logger.info(f"Applied reasoning effort={thinking_budget} to model {self.model}")
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
|
|
||||||
)
|
|
||||||
|
|
||||||
payload = {
|
payload = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"prompt": prompt,
|
"prompt": prompt,
|
||||||
"stream": False, # Keep it simple for now
|
"stream": False, # Keep it simple for now
|
||||||
"options": options,
|
"options": kwargs,
|
||||||
}
|
}
|
||||||
logger.debug(f"Sending request to Ollama: {payload}")
|
logger.debug(f"Sending request to Ollama: {payload}")
|
||||||
try:
|
try:
|
||||||
@@ -572,41 +542,14 @@ class HFChat(LLMInterface):
|
|||||||
self.device = "cpu"
|
self.device = "cpu"
|
||||||
logger.info("No GPU detected. Using CPU.")
|
logger.info("No GPU detected. Using CPU.")
|
||||||
|
|
||||||
# Load tokenizer and model with timeout protection
|
# Load tokenizer and model
|
||||||
try:
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
import signal
|
self.model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_name,
|
||||||
def timeout_handler(signum, frame):
|
torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
|
||||||
raise TimeoutError("Model download/loading timed out")
|
device_map="auto" if self.device != "cpu" else None,
|
||||||
|
trust_remote_code=True,
|
||||||
# Set timeout for model loading (60 seconds)
|
)
|
||||||
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
|
||||||
signal.alarm(60)
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info(f"Loading tokenizer for {model_name}...")
|
|
||||||
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
||||||
|
|
||||||
logger.info(f"Loading model {model_name}...")
|
|
||||||
self.model = AutoModelForCausalLM.from_pretrained(
|
|
||||||
model_name,
|
|
||||||
torch_dtype=torch.float16 if self.device != "cpu" else torch.float32,
|
|
||||||
device_map="auto" if self.device != "cpu" else None,
|
|
||||||
trust_remote_code=True,
|
|
||||||
)
|
|
||||||
logger.info(f"Successfully loaded {model_name}")
|
|
||||||
finally:
|
|
||||||
signal.alarm(0) # Cancel the alarm
|
|
||||||
signal.signal(signal.SIGALRM, old_handler) # Restore old handler
|
|
||||||
|
|
||||||
except TimeoutError:
|
|
||||||
logger.error(f"Model loading timed out for {model_name}")
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Model loading timed out for {model_name}. Please check your internet connection or try a smaller model."
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to load model {model_name}: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# Move model to device if not using device_map
|
# Move model to device if not using device_map
|
||||||
if self.device != "cpu" and "device_map" not in str(self.model):
|
if self.device != "cpu" and "device_map" not in str(self.model):
|
||||||
@@ -685,7 +628,7 @@ class HFChat(LLMInterface):
|
|||||||
class OpenAIChat(LLMInterface):
|
class OpenAIChat(LLMInterface):
|
||||||
"""LLM interface for OpenAI models."""
|
"""LLM interface for OpenAI models."""
|
||||||
|
|
||||||
def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
|
def __init__(self, model: str = "gpt-4o", api_key: str | None = None):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
||||||
|
|
||||||
@@ -710,38 +653,11 @@ class OpenAIChat(LLMInterface):
|
|||||||
params = {
|
params = {
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
|
"max_tokens": kwargs.get("max_tokens", 1000),
|
||||||
"temperature": kwargs.get("temperature", 0.7),
|
"temperature": kwargs.get("temperature", 0.7),
|
||||||
|
**{k: v for k, v in kwargs.items() if k not in ["max_tokens", "temperature"]},
|
||||||
}
|
}
|
||||||
|
|
||||||
# Handle max_tokens vs max_completion_tokens based on model
|
|
||||||
max_tokens = kwargs.get("max_tokens", 1000)
|
|
||||||
if "o3" in self.model or "o4" in self.model or "o1" in self.model:
|
|
||||||
# o-series models use max_completion_tokens
|
|
||||||
params["max_completion_tokens"] = max_tokens
|
|
||||||
params["temperature"] = 1.0
|
|
||||||
else:
|
|
||||||
# Other models use max_tokens
|
|
||||||
params["max_tokens"] = max_tokens
|
|
||||||
|
|
||||||
# Handle thinking budget for reasoning models
|
|
||||||
thinking_budget = kwargs.get("thinking_budget")
|
|
||||||
if thinking_budget and thinking_budget in ["low", "medium", "high"]:
|
|
||||||
# Check if this is an o-series model (partial match for model names)
|
|
||||||
o_series_models = ["o3", "o3-mini", "o4-mini", "o1", "o3-pro", "o3-deep-research"]
|
|
||||||
if any(model in self.model for model in o_series_models):
|
|
||||||
# Use the correct OpenAI reasoning parameter format
|
|
||||||
params["reasoning_effort"] = thinking_budget
|
|
||||||
logger.info(f"Applied reasoning_effort={thinking_budget} to model {self.model}")
|
|
||||||
else:
|
|
||||||
logger.warning(
|
|
||||||
f"Thinking budget '{thinking_budget}' requested but model '{self.model}' may not support reasoning parameters. Proceeding without reasoning."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add other kwargs (excluding thinking_budget as it's handled above)
|
|
||||||
for k, v in kwargs.items():
|
|
||||||
if k not in ["max_tokens", "temperature", "thinking_budget"]:
|
|
||||||
params[k] = v
|
|
||||||
|
|
||||||
logger.info(f"Sending request to OpenAI with model {self.model}")
|
logger.info(f"Sending request to OpenAI with model {self.model}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -761,7 +677,7 @@ class SimulatedChat(LLMInterface):
|
|||||||
return "This is a simulated answer from the LLM based on the retrieved context."
|
return "This is a simulated answer from the LLM based on the retrieved context."
|
||||||
|
|
||||||
|
|
||||||
def get_llm(llm_config: Optional[dict[str, Any]] = None) -> LLMInterface:
|
def get_llm(llm_config: dict[str, Any] | None = None) -> LLMInterface:
|
||||||
"""
|
"""
|
||||||
Factory function to get an LLM interface based on configuration.
|
Factory function to get an LLM interface based on configuration.
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +1,9 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import asyncio
|
import asyncio
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
from llama_index.core import SimpleDirectoryReader
|
from llama_index.core import SimpleDirectoryReader
|
||||||
from llama_index.core.node_parser import SentenceSplitter
|
from llama_index.core.node_parser import SentenceSplitter
|
||||||
from tqdm import tqdm
|
|
||||||
|
|
||||||
from .api import LeannBuilder, LeannChat, LeannSearcher
|
from .api import LeannBuilder, LeannChat, LeannSearcher
|
||||||
|
|
||||||
@@ -43,23 +41,13 @@ def extract_pdf_text_with_pdfplumber(file_path: str) -> str:
|
|||||||
|
|
||||||
class LeannCLI:
|
class LeannCLI:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
# Always use project-local .leann directory (like .git)
|
self.indexes_dir = Path.home() / ".leann" / "indexes"
|
||||||
self.indexes_dir = Path.cwd() / ".leann" / "indexes"
|
|
||||||
self.indexes_dir.mkdir(parents=True, exist_ok=True)
|
self.indexes_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
# Default parser for documents
|
|
||||||
self.node_parser = SentenceSplitter(
|
self.node_parser = SentenceSplitter(
|
||||||
chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
|
chunk_size=256, chunk_overlap=128, separator=" ", paragraph_separator="\n\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Code-optimized parser
|
|
||||||
self.code_parser = SentenceSplitter(
|
|
||||||
chunk_size=512, # Larger chunks for code context
|
|
||||||
chunk_overlap=50, # Less overlap to preserve function boundaries
|
|
||||||
separator="\n", # Split by lines for code
|
|
||||||
paragraph_separator="\n\n", # Preserve logical code blocks
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_index_path(self, index_name: str) -> str:
|
def get_index_path(self, index_name: str) -> str:
|
||||||
index_dir = self.indexes_dir / index_name
|
index_dir = self.indexes_dir / index_name
|
||||||
return str(index_dir / "documents.leann")
|
return str(index_dir / "documents.leann")
|
||||||
@@ -76,14 +64,10 @@ class LeannCLI:
|
|||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
epilog="""
|
epilog="""
|
||||||
Examples:
|
Examples:
|
||||||
leann build my-docs --docs ./documents # Build index from directory
|
leann build my-docs --docs ./documents # Build index named my-docs
|
||||||
leann build my-code --docs ./src ./tests ./config # Build index from multiple directories
|
leann search my-docs "query" # Search in my-docs index
|
||||||
leann build my-files --docs ./file1.py ./file2.txt ./docs/ # Build index from files and directories
|
leann ask my-docs "question" # Ask my-docs index
|
||||||
leann build my-mixed --docs ./readme.md ./src/ ./config.json # Build index from mixed files/dirs
|
leann list # List all stored indexes
|
||||||
leann build my-ppts --docs ./ --file-types .pptx,.pdf # Index only PowerPoint and PDF files
|
|
||||||
leann search my-docs "query" # Search in my-docs index
|
|
||||||
leann ask my-docs "question" # Ask my-docs index
|
|
||||||
leann list # List all stored indexes
|
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -91,38 +75,18 @@ Examples:
|
|||||||
|
|
||||||
# Build command
|
# Build command
|
||||||
build_parser = subparsers.add_parser("build", help="Build document index")
|
build_parser = subparsers.add_parser("build", help="Build document index")
|
||||||
build_parser.add_argument(
|
build_parser.add_argument("index_name", help="Index name")
|
||||||
"index_name", nargs="?", help="Index name (default: current directory name)"
|
build_parser.add_argument("--docs", type=str, required=True, help="Documents directory")
|
||||||
)
|
|
||||||
build_parser.add_argument(
|
|
||||||
"--docs",
|
|
||||||
type=str,
|
|
||||||
nargs="+",
|
|
||||||
default=["."],
|
|
||||||
help="Documents directories and/or files (default: current directory)",
|
|
||||||
)
|
|
||||||
build_parser.add_argument(
|
build_parser.add_argument(
|
||||||
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
|
"--backend", type=str, default="hnsw", choices=["hnsw", "diskann"]
|
||||||
)
|
)
|
||||||
build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
|
build_parser.add_argument("--embedding-model", type=str, default="facebook/contriever")
|
||||||
build_parser.add_argument(
|
|
||||||
"--embedding-mode",
|
|
||||||
type=str,
|
|
||||||
default="sentence-transformers",
|
|
||||||
choices=["sentence-transformers", "openai", "mlx", "ollama"],
|
|
||||||
help="Embedding backend mode (default: sentence-transformers)",
|
|
||||||
)
|
|
||||||
build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
|
build_parser.add_argument("--force", "-f", action="store_true", help="Force rebuild")
|
||||||
build_parser.add_argument("--graph-degree", type=int, default=32)
|
build_parser.add_argument("--graph-degree", type=int, default=32)
|
||||||
build_parser.add_argument("--complexity", type=int, default=64)
|
build_parser.add_argument("--complexity", type=int, default=64)
|
||||||
build_parser.add_argument("--num-threads", type=int, default=1)
|
build_parser.add_argument("--num-threads", type=int, default=1)
|
||||||
build_parser.add_argument("--compact", action="store_true", default=True)
|
build_parser.add_argument("--compact", action="store_true", default=True)
|
||||||
build_parser.add_argument("--recompute", action="store_true", default=True)
|
build_parser.add_argument("--recompute", action="store_true", default=True)
|
||||||
build_parser.add_argument(
|
|
||||||
"--file-types",
|
|
||||||
type=str,
|
|
||||||
help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Search command
|
# Search command
|
||||||
search_parser = subparsers.add_parser("search", help="Search documents")
|
search_parser = subparsers.add_parser("search", help="Search documents")
|
||||||
@@ -132,12 +96,7 @@ Examples:
|
|||||||
search_parser.add_argument("--complexity", type=int, default=64)
|
search_parser.add_argument("--complexity", type=int, default=64)
|
||||||
search_parser.add_argument("--beam-width", type=int, default=1)
|
search_parser.add_argument("--beam-width", type=int, default=1)
|
||||||
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||||
search_parser.add_argument(
|
search_parser.add_argument("--recompute-embeddings", action="store_true")
|
||||||
"--recompute-embeddings",
|
|
||||||
action="store_true",
|
|
||||||
default=True,
|
|
||||||
help="Recompute embeddings (default: True)",
|
|
||||||
)
|
|
||||||
search_parser.add_argument(
|
search_parser.add_argument(
|
||||||
"--pruning-strategy",
|
"--pruning-strategy",
|
||||||
choices=["global", "local", "proportional"],
|
choices=["global", "local", "proportional"],
|
||||||
@@ -160,497 +119,94 @@ Examples:
|
|||||||
ask_parser.add_argument("--complexity", type=int, default=32)
|
ask_parser.add_argument("--complexity", type=int, default=32)
|
||||||
ask_parser.add_argument("--beam-width", type=int, default=1)
|
ask_parser.add_argument("--beam-width", type=int, default=1)
|
||||||
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||||
ask_parser.add_argument(
|
ask_parser.add_argument("--recompute-embeddings", action="store_true")
|
||||||
"--recompute-embeddings",
|
|
||||||
action="store_true",
|
|
||||||
default=True,
|
|
||||||
help="Recompute embeddings (default: True)",
|
|
||||||
)
|
|
||||||
ask_parser.add_argument(
|
ask_parser.add_argument(
|
||||||
"--pruning-strategy",
|
"--pruning-strategy",
|
||||||
choices=["global", "local", "proportional"],
|
choices=["global", "local", "proportional"],
|
||||||
default="global",
|
default="global",
|
||||||
)
|
)
|
||||||
ask_parser.add_argument(
|
|
||||||
"--thinking-budget",
|
|
||||||
type=str,
|
|
||||||
choices=["low", "medium", "high"],
|
|
||||||
default=None,
|
|
||||||
help="Thinking budget for reasoning models (low/medium/high). Supported by GPT-Oss:20b and other reasoning models.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# List command
|
# List command
|
||||||
subparsers.add_parser("list", help="List all indexes")
|
subparsers.add_parser("list", help="List all indexes")
|
||||||
|
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def register_project_dir(self):
|
|
||||||
"""Register current project directory in global registry"""
|
|
||||||
global_registry = Path.home() / ".leann" / "projects.json"
|
|
||||||
global_registry.parent.mkdir(exist_ok=True)
|
|
||||||
|
|
||||||
current_dir = str(Path.cwd())
|
|
||||||
|
|
||||||
# Load existing registry
|
|
||||||
projects = []
|
|
||||||
if global_registry.exists():
|
|
||||||
try:
|
|
||||||
import json
|
|
||||||
|
|
||||||
with open(global_registry) as f:
|
|
||||||
projects = json.load(f)
|
|
||||||
except Exception:
|
|
||||||
projects = []
|
|
||||||
|
|
||||||
# Add current directory if not already present
|
|
||||||
if current_dir not in projects:
|
|
||||||
projects.append(current_dir)
|
|
||||||
|
|
||||||
# Save registry
|
|
||||||
import json
|
|
||||||
|
|
||||||
with open(global_registry, "w") as f:
|
|
||||||
json.dump(projects, f, indent=2)
|
|
||||||
|
|
||||||
def _build_gitignore_parser(self, docs_dir: str):
|
|
||||||
"""Build gitignore parser using gitignore-parser library."""
|
|
||||||
from gitignore_parser import parse_gitignore
|
|
||||||
|
|
||||||
# Try to parse the root .gitignore
|
|
||||||
gitignore_path = Path(docs_dir) / ".gitignore"
|
|
||||||
|
|
||||||
if gitignore_path.exists():
|
|
||||||
try:
|
|
||||||
# gitignore-parser automatically handles all subdirectory .gitignore files!
|
|
||||||
matches = parse_gitignore(str(gitignore_path))
|
|
||||||
print(f"📋 Loaded .gitignore from {docs_dir} (includes all subdirectories)")
|
|
||||||
return matches
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Warning: Could not parse .gitignore: {e}")
|
|
||||||
else:
|
|
||||||
print("📋 No .gitignore found")
|
|
||||||
|
|
||||||
# Fallback: basic pattern matching for essential files
|
|
||||||
essential_patterns = {".git", ".DS_Store", "__pycache__", "node_modules", ".venv", "venv"}
|
|
||||||
|
|
||||||
def basic_matches(file_path):
|
|
||||||
path_parts = Path(file_path).parts
|
|
||||||
return any(part in essential_patterns for part in path_parts)
|
|
||||||
|
|
||||||
return basic_matches
|
|
||||||
|
|
||||||
def _should_exclude_file(self, relative_path: Path, gitignore_matches) -> bool:
|
|
||||||
"""Check if a file should be excluded using gitignore parser."""
|
|
||||||
return gitignore_matches(str(relative_path))
|
|
||||||
|
|
||||||
def _is_git_submodule(self, path: Path) -> bool:
|
|
||||||
"""Check if a path is a git submodule."""
|
|
||||||
try:
|
|
||||||
# Find the git repo root
|
|
||||||
current_dir = Path.cwd()
|
|
||||||
while current_dir != current_dir.parent:
|
|
||||||
if (current_dir / ".git").exists():
|
|
||||||
gitmodules_path = current_dir / ".gitmodules"
|
|
||||||
if gitmodules_path.exists():
|
|
||||||
# Read .gitmodules to check if this path is a submodule
|
|
||||||
gitmodules_content = gitmodules_path.read_text()
|
|
||||||
# Convert path to relative to git root
|
|
||||||
try:
|
|
||||||
relative_path = path.resolve().relative_to(current_dir)
|
|
||||||
# Check if this path appears in .gitmodules
|
|
||||||
return f"path = {relative_path}" in gitmodules_content
|
|
||||||
except ValueError:
|
|
||||||
# Path is not under git root
|
|
||||||
return False
|
|
||||||
break
|
|
||||||
current_dir = current_dir.parent
|
|
||||||
return False
|
|
||||||
except Exception:
|
|
||||||
# If anything goes wrong, assume it's not a submodule
|
|
||||||
return False
|
|
||||||
|
|
||||||
def list_indexes(self):
|
def list_indexes(self):
|
||||||
print("Stored LEANN indexes:")
|
print("Stored LEANN indexes:")
|
||||||
|
|
||||||
# Get all project directories with .leann
|
if not self.indexes_dir.exists():
|
||||||
global_registry = Path.home() / ".leann" / "projects.json"
|
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
||||||
all_projects = []
|
|
||||||
|
|
||||||
if global_registry.exists():
|
|
||||||
try:
|
|
||||||
import json
|
|
||||||
|
|
||||||
with open(global_registry) as f:
|
|
||||||
all_projects = json.load(f)
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Filter to only existing directories with .leann
|
|
||||||
valid_projects = []
|
|
||||||
for project_dir in all_projects:
|
|
||||||
project_path = Path(project_dir)
|
|
||||||
if project_path.exists() and (project_path / ".leann" / "indexes").exists():
|
|
||||||
valid_projects.append(project_path)
|
|
||||||
|
|
||||||
# Add current project if it has .leann but not in registry
|
|
||||||
current_path = Path.cwd()
|
|
||||||
if (current_path / ".leann" / "indexes").exists() and current_path not in valid_projects:
|
|
||||||
valid_projects.append(current_path)
|
|
||||||
|
|
||||||
if not valid_projects:
|
|
||||||
print(
|
|
||||||
"No indexes found. Use 'leann build <name> --docs <dir> [<dir2> ...]' to create one."
|
|
||||||
)
|
|
||||||
return
|
return
|
||||||
|
|
||||||
total_indexes = 0
|
index_dirs = [d for d in self.indexes_dir.iterdir() if d.is_dir()]
|
||||||
current_dir = Path.cwd()
|
|
||||||
|
|
||||||
for project_path in valid_projects:
|
if not index_dirs:
|
||||||
indexes_dir = project_path / ".leann" / "indexes"
|
print("No indexes found. Use 'leann build <name> --docs <dir>' to create one.")
|
||||||
if not indexes_dir.exists():
|
return
|
||||||
continue
|
|
||||||
|
|
||||||
index_dirs = [d for d in indexes_dir.iterdir() if d.is_dir()]
|
print(f"Found {len(index_dirs)} indexes:")
|
||||||
if not index_dirs:
|
for i, index_dir in enumerate(index_dirs, 1):
|
||||||
continue
|
index_name = index_dir.name
|
||||||
|
status = "✓" if self.index_exists(index_name) else "✗"
|
||||||
|
|
||||||
# Show project header
|
print(f" {i}. {index_name} [{status}]")
|
||||||
if project_path == current_dir:
|
if self.index_exists(index_name):
|
||||||
print(f"\n📁 Current project ({project_path}):")
|
index_dir / "documents.leann.meta.json"
|
||||||
|
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
|
||||||
|
1024 * 1024
|
||||||
|
)
|
||||||
|
print(f" Size: {size_mb:.1f} MB")
|
||||||
|
|
||||||
|
if index_dirs:
|
||||||
|
example_name = index_dirs[0].name
|
||||||
|
print("\nUsage:")
|
||||||
|
print(f' leann search {example_name} "your query"')
|
||||||
|
print(f" leann ask {example_name} --interactive")
|
||||||
|
|
||||||
|
def load_documents(self, docs_dir: str):
|
||||||
|
print(f"Loading documents from {docs_dir}...")
|
||||||
|
|
||||||
|
# Try to use better PDF parsers first
|
||||||
|
documents = []
|
||||||
|
docs_path = Path(docs_dir)
|
||||||
|
|
||||||
|
for file_path in docs_path.rglob("*.pdf"):
|
||||||
|
print(f"Processing PDF: {file_path}")
|
||||||
|
|
||||||
|
# Try PyMuPDF first (best quality)
|
||||||
|
text = extract_pdf_text_with_pymupdf(str(file_path))
|
||||||
|
if text is None:
|
||||||
|
# Try pdfplumber
|
||||||
|
text = extract_pdf_text_with_pdfplumber(str(file_path))
|
||||||
|
|
||||||
|
if text:
|
||||||
|
# Create a simple document structure
|
||||||
|
from llama_index.core import Document
|
||||||
|
|
||||||
|
doc = Document(text=text, metadata={"source": str(file_path)})
|
||||||
|
documents.append(doc)
|
||||||
else:
|
else:
|
||||||
print(f"\n📂 {project_path}:")
|
# Fallback to default reader
|
||||||
|
print(f"Using default reader for {file_path}")
|
||||||
for index_dir in index_dirs:
|
default_docs = SimpleDirectoryReader(
|
||||||
total_indexes += 1
|
str(file_path.parent),
|
||||||
index_name = index_dir.name
|
|
||||||
meta_file = index_dir / "documents.leann.meta.json"
|
|
||||||
status = "✓" if meta_file.exists() else "✗"
|
|
||||||
|
|
||||||
print(f" {total_indexes}. {index_name} [{status}]")
|
|
||||||
if status == "✓":
|
|
||||||
size_mb = sum(f.stat().st_size for f in index_dir.iterdir() if f.is_file()) / (
|
|
||||||
1024 * 1024
|
|
||||||
)
|
|
||||||
print(f" Size: {size_mb:.1f} MB")
|
|
||||||
|
|
||||||
if total_indexes > 0:
|
|
||||||
print(f"\nTotal: {total_indexes} indexes across {len(valid_projects)} projects")
|
|
||||||
print("\nUsage (current project only):")
|
|
||||||
|
|
||||||
# Show example from current project
|
|
||||||
current_indexes_dir = current_dir / ".leann" / "indexes"
|
|
||||||
if current_indexes_dir.exists():
|
|
||||||
current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()]
|
|
||||||
if current_index_dirs:
|
|
||||||
example_name = current_index_dirs[0].name
|
|
||||||
print(f' leann search {example_name} "your query"')
|
|
||||||
print(f" leann ask {example_name} --interactive")
|
|
||||||
|
|
||||||
def load_documents(
|
|
||||||
self, docs_paths: Union[str, list], custom_file_types: Union[str, None] = None
|
|
||||||
):
|
|
||||||
# Handle both single path (string) and multiple paths (list) for backward compatibility
|
|
||||||
if isinstance(docs_paths, str):
|
|
||||||
docs_paths = [docs_paths]
|
|
||||||
|
|
||||||
# Separate files and directories
|
|
||||||
files = []
|
|
||||||
directories = []
|
|
||||||
for path in docs_paths:
|
|
||||||
path_obj = Path(path)
|
|
||||||
if path_obj.is_file():
|
|
||||||
files.append(str(path_obj))
|
|
||||||
elif path_obj.is_dir():
|
|
||||||
# Check if this is a git submodule - if so, skip it
|
|
||||||
if self._is_git_submodule(path_obj):
|
|
||||||
print(f"⚠️ Skipping git submodule: {path}")
|
|
||||||
continue
|
|
||||||
directories.append(str(path_obj))
|
|
||||||
else:
|
|
||||||
print(f"⚠️ Warning: Path '{path}' does not exist, skipping...")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Print summary of what we're processing
|
|
||||||
total_items = len(files) + len(directories)
|
|
||||||
items_desc = []
|
|
||||||
if files:
|
|
||||||
items_desc.append(f"{len(files)} file{'s' if len(files) > 1 else ''}")
|
|
||||||
if directories:
|
|
||||||
items_desc.append(
|
|
||||||
f"{len(directories)} director{'ies' if len(directories) > 1 else 'y'}"
|
|
||||||
)
|
|
||||||
|
|
||||||
print(f"Loading documents from {' and '.join(items_desc)} ({total_items} total):")
|
|
||||||
if files:
|
|
||||||
print(f" 📄 Files: {', '.join([Path(f).name for f in files])}")
|
|
||||||
if directories:
|
|
||||||
print(f" 📁 Directories: {', '.join(directories)}")
|
|
||||||
|
|
||||||
if custom_file_types:
|
|
||||||
print(f"Using custom file types: {custom_file_types}")
|
|
||||||
|
|
||||||
all_documents = []
|
|
||||||
|
|
||||||
# First, process individual files if any
|
|
||||||
if files:
|
|
||||||
print(f"\n🔄 Processing {len(files)} individual file{'s' if len(files) > 1 else ''}...")
|
|
||||||
|
|
||||||
# Load individual files using SimpleDirectoryReader with input_files
|
|
||||||
# Note: We skip gitignore filtering for explicitly specified files
|
|
||||||
try:
|
|
||||||
# Group files by their parent directory for efficient loading
|
|
||||||
from collections import defaultdict
|
|
||||||
|
|
||||||
files_by_dir = defaultdict(list)
|
|
||||||
for file_path in files:
|
|
||||||
parent_dir = str(Path(file_path).parent)
|
|
||||||
files_by_dir[parent_dir].append(file_path)
|
|
||||||
|
|
||||||
# Load files from each parent directory
|
|
||||||
for parent_dir, file_list in files_by_dir.items():
|
|
||||||
print(
|
|
||||||
f" Loading {len(file_list)} file{'s' if len(file_list) > 1 else ''} from {parent_dir}"
|
|
||||||
)
|
|
||||||
try:
|
|
||||||
file_docs = SimpleDirectoryReader(
|
|
||||||
parent_dir,
|
|
||||||
input_files=file_list,
|
|
||||||
filename_as_id=True,
|
|
||||||
).load_data()
|
|
||||||
all_documents.extend(file_docs)
|
|
||||||
print(
|
|
||||||
f" ✅ Loaded {len(file_docs)} document{'s' if len(file_docs) > 1 else ''}"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
print(f" ❌ Warning: Could not load files from {parent_dir}: {e}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"❌ Error processing individual files: {e}")
|
|
||||||
|
|
||||||
# Define file extensions to process
|
|
||||||
if custom_file_types:
|
|
||||||
# Parse custom file types from comma-separated string
|
|
||||||
code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
|
|
||||||
# Ensure extensions start with a dot
|
|
||||||
code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
|
|
||||||
else:
|
|
||||||
# Use default supported file types
|
|
||||||
code_extensions = [
|
|
||||||
# Original document types
|
|
||||||
".txt",
|
|
||||||
".md",
|
|
||||||
".docx",
|
|
||||||
".pptx",
|
|
||||||
# Code files for Claude Code integration
|
|
||||||
".py",
|
|
||||||
".js",
|
|
||||||
".ts",
|
|
||||||
".jsx",
|
|
||||||
".tsx",
|
|
||||||
".java",
|
|
||||||
".cpp",
|
|
||||||
".c",
|
|
||||||
".h",
|
|
||||||
".hpp",
|
|
||||||
".cs",
|
|
||||||
".go",
|
|
||||||
".rs",
|
|
||||||
".rb",
|
|
||||||
".php",
|
|
||||||
".swift",
|
|
||||||
".kt",
|
|
||||||
".scala",
|
|
||||||
".r",
|
|
||||||
".sql",
|
|
||||||
".sh",
|
|
||||||
".bash",
|
|
||||||
".zsh",
|
|
||||||
".fish",
|
|
||||||
".ps1",
|
|
||||||
".bat",
|
|
||||||
# Config and markup files
|
|
||||||
".json",
|
|
||||||
".yaml",
|
|
||||||
".yml",
|
|
||||||
".xml",
|
|
||||||
".toml",
|
|
||||||
".ini",
|
|
||||||
".cfg",
|
|
||||||
".conf",
|
|
||||||
".html",
|
|
||||||
".css",
|
|
||||||
".scss",
|
|
||||||
".less",
|
|
||||||
".vue",
|
|
||||||
".svelte",
|
|
||||||
# Data science
|
|
||||||
".ipynb",
|
|
||||||
".R",
|
|
||||||
".py",
|
|
||||||
".jl",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Process each directory
|
|
||||||
if directories:
|
|
||||||
print(
|
|
||||||
f"\n🔄 Processing {len(directories)} director{'ies' if len(directories) > 1 else 'y'}..."
|
|
||||||
)
|
|
||||||
|
|
||||||
for docs_dir in directories:
|
|
||||||
print(f"Processing directory: {docs_dir}")
|
|
||||||
# Build gitignore parser for each directory
|
|
||||||
gitignore_matches = self._build_gitignore_parser(docs_dir)
|
|
||||||
|
|
||||||
# Try to use better PDF parsers first, but only if PDFs are requested
|
|
||||||
documents = []
|
|
||||||
docs_path = Path(docs_dir)
|
|
||||||
|
|
||||||
# Check if we should process PDFs
|
|
||||||
should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types
|
|
||||||
|
|
||||||
if should_process_pdfs:
|
|
||||||
for file_path in docs_path.rglob("*.pdf"):
|
|
||||||
# Check if file matches any exclude pattern
|
|
||||||
try:
|
|
||||||
relative_path = file_path.relative_to(docs_path)
|
|
||||||
if self._should_exclude_file(relative_path, gitignore_matches):
|
|
||||||
continue
|
|
||||||
except ValueError:
|
|
||||||
# Skip files that can't be made relative to docs_path
|
|
||||||
print(f"⚠️ Skipping file outside directory scope: {file_path}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
print(f"Processing PDF: {file_path}")
|
|
||||||
|
|
||||||
# Try PyMuPDF first (best quality)
|
|
||||||
text = extract_pdf_text_with_pymupdf(str(file_path))
|
|
||||||
if text is None:
|
|
||||||
# Try pdfplumber
|
|
||||||
text = extract_pdf_text_with_pdfplumber(str(file_path))
|
|
||||||
|
|
||||||
if text:
|
|
||||||
# Create a simple document structure
|
|
||||||
from llama_index.core import Document
|
|
||||||
|
|
||||||
doc = Document(text=text, metadata={"source": str(file_path)})
|
|
||||||
documents.append(doc)
|
|
||||||
else:
|
|
||||||
# Fallback to default reader
|
|
||||||
print(f"Using default reader for {file_path}")
|
|
||||||
try:
|
|
||||||
default_docs = SimpleDirectoryReader(
|
|
||||||
str(file_path.parent),
|
|
||||||
filename_as_id=True,
|
|
||||||
required_exts=[file_path.suffix],
|
|
||||||
).load_data()
|
|
||||||
documents.extend(default_docs)
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Warning: Could not process {file_path}: {e}")
|
|
||||||
|
|
||||||
# Load other file types with default reader
|
|
||||||
try:
|
|
||||||
# Create a custom file filter function using our PathSpec
|
|
||||||
def file_filter(
|
|
||||||
file_path: str, docs_dir=docs_dir, gitignore_matches=gitignore_matches
|
|
||||||
) -> bool:
|
|
||||||
"""Return True if file should be included (not excluded)"""
|
|
||||||
try:
|
|
||||||
docs_path_obj = Path(docs_dir)
|
|
||||||
file_path_obj = Path(file_path)
|
|
||||||
relative_path = file_path_obj.relative_to(docs_path_obj)
|
|
||||||
return not self._should_exclude_file(relative_path, gitignore_matches)
|
|
||||||
except (ValueError, OSError):
|
|
||||||
return True # Include files that can't be processed
|
|
||||||
|
|
||||||
other_docs = SimpleDirectoryReader(
|
|
||||||
docs_dir,
|
|
||||||
recursive=True,
|
|
||||||
encoding="utf-8",
|
|
||||||
required_exts=code_extensions,
|
|
||||||
file_extractor={}, # Use default extractors
|
|
||||||
filename_as_id=True,
|
filename_as_id=True,
|
||||||
).load_data(show_progress=True)
|
required_exts=[file_path.suffix],
|
||||||
|
).load_data()
|
||||||
|
documents.extend(default_docs)
|
||||||
|
|
||||||
# Filter documents after loading based on gitignore rules
|
# Load other file types with default reader
|
||||||
filtered_docs = []
|
other_docs = SimpleDirectoryReader(
|
||||||
for doc in other_docs:
|
docs_dir,
|
||||||
file_path = doc.metadata.get("file_path", "")
|
recursive=True,
|
||||||
if file_filter(file_path):
|
encoding="utf-8",
|
||||||
filtered_docs.append(doc)
|
required_exts=[".txt", ".md", ".docx"],
|
||||||
|
).load_data(show_progress=True)
|
||||||
documents.extend(filtered_docs)
|
documents.extend(other_docs)
|
||||||
except ValueError as e:
|
|
||||||
if "No files found" in str(e):
|
|
||||||
print(f"No additional files found for other supported types in {docs_dir}.")
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
all_documents.extend(documents)
|
|
||||||
print(f"Loaded {len(documents)} documents from {docs_dir}")
|
|
||||||
|
|
||||||
documents = all_documents
|
|
||||||
|
|
||||||
all_texts = []
|
all_texts = []
|
||||||
|
for doc in documents:
|
||||||
# Define code file extensions for intelligent chunking
|
nodes = self.node_parser.get_nodes_from_documents([doc])
|
||||||
code_file_exts = {
|
|
||||||
".py",
|
|
||||||
".js",
|
|
||||||
".ts",
|
|
||||||
".jsx",
|
|
||||||
".tsx",
|
|
||||||
".java",
|
|
||||||
".cpp",
|
|
||||||
".c",
|
|
||||||
".h",
|
|
||||||
".hpp",
|
|
||||||
".cs",
|
|
||||||
".go",
|
|
||||||
".rs",
|
|
||||||
".rb",
|
|
||||||
".php",
|
|
||||||
".swift",
|
|
||||||
".kt",
|
|
||||||
".scala",
|
|
||||||
".r",
|
|
||||||
".sql",
|
|
||||||
".sh",
|
|
||||||
".bash",
|
|
||||||
".zsh",
|
|
||||||
".fish",
|
|
||||||
".ps1",
|
|
||||||
".bat",
|
|
||||||
".json",
|
|
||||||
".yaml",
|
|
||||||
".yml",
|
|
||||||
".xml",
|
|
||||||
".toml",
|
|
||||||
".ini",
|
|
||||||
".cfg",
|
|
||||||
".conf",
|
|
||||||
".html",
|
|
||||||
".css",
|
|
||||||
".scss",
|
|
||||||
".less",
|
|
||||||
".vue",
|
|
||||||
".svelte",
|
|
||||||
".ipynb",
|
|
||||||
".R",
|
|
||||||
".jl",
|
|
||||||
}
|
|
||||||
|
|
||||||
print("start chunking documents")
|
|
||||||
# Add progress bar for document chunking
|
|
||||||
for doc in tqdm(documents, desc="Chunking documents", unit="doc"):
|
|
||||||
# Check if this is a code file based on source path
|
|
||||||
source_path = doc.metadata.get("source", "")
|
|
||||||
is_code_file = any(source_path.endswith(ext) for ext in code_file_exts)
|
|
||||||
|
|
||||||
# Use appropriate parser based on file type
|
|
||||||
parser = self.code_parser if is_code_file else self.node_parser
|
|
||||||
nodes = parser.get_nodes_from_documents([doc])
|
|
||||||
|
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
all_texts.append(node.get_content())
|
all_texts.append(node.get_content())
|
||||||
|
|
||||||
@@ -658,36 +214,16 @@ Examples:
|
|||||||
return all_texts
|
return all_texts
|
||||||
|
|
||||||
async def build_index(self, args):
|
async def build_index(self, args):
|
||||||
docs_paths = args.docs
|
docs_dir = args.docs
|
||||||
# Use current directory name if index_name not provided
|
index_name = args.index_name
|
||||||
if args.index_name:
|
|
||||||
index_name = args.index_name
|
|
||||||
else:
|
|
||||||
index_name = Path.cwd().name
|
|
||||||
print(f"Using current directory name as index: '{index_name}'")
|
|
||||||
|
|
||||||
index_dir = self.indexes_dir / index_name
|
index_dir = self.indexes_dir / index_name
|
||||||
index_path = self.get_index_path(index_name)
|
index_path = self.get_index_path(index_name)
|
||||||
|
|
||||||
# Display all paths being indexed with file/directory distinction
|
|
||||||
files = [p for p in docs_paths if Path(p).is_file()]
|
|
||||||
directories = [p for p in docs_paths if Path(p).is_dir()]
|
|
||||||
|
|
||||||
print(f"📂 Indexing {len(docs_paths)} path{'s' if len(docs_paths) > 1 else ''}:")
|
|
||||||
if files:
|
|
||||||
print(f" 📄 Files ({len(files)}):")
|
|
||||||
for i, file_path in enumerate(files, 1):
|
|
||||||
print(f" {i}. {Path(file_path).resolve()}")
|
|
||||||
if directories:
|
|
||||||
print(f" 📁 Directories ({len(directories)}):")
|
|
||||||
for i, dir_path in enumerate(directories, 1):
|
|
||||||
print(f" {i}. {Path(dir_path).resolve()}")
|
|
||||||
|
|
||||||
if index_dir.exists() and not args.force:
|
if index_dir.exists() and not args.force:
|
||||||
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
||||||
return
|
return
|
||||||
|
|
||||||
all_texts = self.load_documents(docs_paths, args.file_types)
|
all_texts = self.load_documents(docs_dir)
|
||||||
if not all_texts:
|
if not all_texts:
|
||||||
print("No documents found")
|
print("No documents found")
|
||||||
return
|
return
|
||||||
@@ -699,7 +235,6 @@ Examples:
|
|||||||
builder = LeannBuilder(
|
builder = LeannBuilder(
|
||||||
backend_name=args.backend,
|
backend_name=args.backend,
|
||||||
embedding_model=args.embedding_model,
|
embedding_model=args.embedding_model,
|
||||||
embedding_mode=args.embedding_mode,
|
|
||||||
graph_degree=args.graph_degree,
|
graph_degree=args.graph_degree,
|
||||||
complexity=args.complexity,
|
complexity=args.complexity,
|
||||||
is_compact=args.compact,
|
is_compact=args.compact,
|
||||||
@@ -713,9 +248,6 @@ Examples:
|
|||||||
builder.build_index(index_path)
|
builder.build_index(index_path)
|
||||||
print(f"Index built at {index_path}")
|
print(f"Index built at {index_path}")
|
||||||
|
|
||||||
# Register this project directory in global registry
|
|
||||||
self.register_project_dir()
|
|
||||||
|
|
||||||
async def search_documents(self, args):
|
async def search_documents(self, args):
|
||||||
index_name = args.index_name
|
index_name = args.index_name
|
||||||
query = args.query
|
query = args.query
|
||||||
@@ -723,7 +255,7 @@ Examples:
|
|||||||
|
|
||||||
if not self.index_exists(index_name):
|
if not self.index_exists(index_name):
|
||||||
print(
|
print(
|
||||||
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
|
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -750,7 +282,7 @@ Examples:
|
|||||||
|
|
||||||
if not self.index_exists(index_name):
|
if not self.index_exists(index_name):
|
||||||
print(
|
print(
|
||||||
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
|
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir>' to create it."
|
||||||
)
|
)
|
||||||
return
|
return
|
||||||
|
|
||||||
@@ -776,11 +308,6 @@ Examples:
|
|||||||
if not user_input:
|
if not user_input:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Prepare LLM kwargs with thinking budget if specified
|
|
||||||
llm_kwargs = {}
|
|
||||||
if args.thinking_budget:
|
|
||||||
llm_kwargs["thinking_budget"] = args.thinking_budget
|
|
||||||
|
|
||||||
response = chat.ask(
|
response = chat.ask(
|
||||||
user_input,
|
user_input,
|
||||||
top_k=args.top_k,
|
top_k=args.top_k,
|
||||||
@@ -789,17 +316,11 @@ Examples:
|
|||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy,
|
pruning_strategy=args.pruning_strategy,
|
||||||
llm_kwargs=llm_kwargs,
|
|
||||||
)
|
)
|
||||||
print(f"LEANN: {response}")
|
print(f"LEANN: {response}")
|
||||||
else:
|
else:
|
||||||
query = input("Enter your question: ").strip()
|
query = input("Enter your question: ").strip()
|
||||||
if query:
|
if query:
|
||||||
# Prepare LLM kwargs with thinking budget if specified
|
|
||||||
llm_kwargs = {}
|
|
||||||
if args.thinking_budget:
|
|
||||||
llm_kwargs["thinking_budget"] = args.thinking_budget
|
|
||||||
|
|
||||||
response = chat.ask(
|
response = chat.ask(
|
||||||
query,
|
query,
|
||||||
top_k=args.top_k,
|
top_k=args.top_k,
|
||||||
@@ -808,7 +329,6 @@ Examples:
|
|||||||
prune_ratio=args.prune_ratio,
|
prune_ratio=args.prune_ratio,
|
||||||
recompute_embeddings=args.recompute_embeddings,
|
recompute_embeddings=args.recompute_embeddings,
|
||||||
pruning_strategy=args.pruning_strategy,
|
pruning_strategy=args.pruning_strategy,
|
||||||
llm_kwargs=llm_kwargs,
|
|
||||||
)
|
)
|
||||||
print(f"LEANN: {response}")
|
print(f"LEANN: {response}")
|
||||||
|
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ def compute_embeddings(
|
|||||||
Args:
|
Args:
|
||||||
texts: List of texts to compute embeddings for
|
texts: List of texts to compute embeddings for
|
||||||
model_name: Model name
|
model_name: Model name
|
||||||
mode: Computation mode ('sentence-transformers', 'openai', 'mlx', 'ollama')
|
mode: Computation mode ('sentence-transformers', 'openai', 'mlx')
|
||||||
is_build: Whether this is a build operation (shows progress bar)
|
is_build: Whether this is a build operation (shows progress bar)
|
||||||
batch_size: Batch size for processing
|
batch_size: Batch size for processing
|
||||||
adaptive_optimization: Whether to use adaptive optimization based on batch size
|
adaptive_optimization: Whether to use adaptive optimization based on batch size
|
||||||
@@ -55,8 +55,6 @@ def compute_embeddings(
|
|||||||
return compute_embeddings_openai(texts, model_name)
|
return compute_embeddings_openai(texts, model_name)
|
||||||
elif mode == "mlx":
|
elif mode == "mlx":
|
||||||
return compute_embeddings_mlx(texts, model_name)
|
return compute_embeddings_mlx(texts, model_name)
|
||||||
elif mode == "ollama":
|
|
||||||
return compute_embeddings_ollama(texts, model_name, is_build=is_build)
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported embedding mode: {mode}")
|
raise ValueError(f"Unsupported embedding mode: {mode}")
|
||||||
|
|
||||||
@@ -367,286 +365,3 @@ def compute_embeddings_mlx(chunks: list[str], model_name: str, batch_size: int =
|
|||||||
|
|
||||||
# Stack numpy arrays
|
# Stack numpy arrays
|
||||||
return np.stack(all_embeddings)
|
return np.stack(all_embeddings)
|
||||||
|
|
||||||
|
|
||||||
def compute_embeddings_ollama(
|
|
||||||
texts: list[str], model_name: str, is_build: bool = False, host: str = "http://localhost:11434"
|
|
||||||
) -> np.ndarray:
|
|
||||||
"""
|
|
||||||
Compute embeddings using Ollama API with simplified batch processing.
|
|
||||||
|
|
||||||
Uses batch size of 32 for MPS/CPU and 128 for CUDA to optimize performance.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
texts: List of texts to compute embeddings for
|
|
||||||
model_name: Ollama model name (e.g., "nomic-embed-text", "mxbai-embed-large")
|
|
||||||
is_build: Whether this is a build operation (shows progress bar)
|
|
||||||
host: Ollama host URL (default: http://localhost:11434)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Normalized embeddings array, shape: (len(texts), embedding_dim)
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import requests
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"The 'requests' library is required for Ollama embeddings. Install with: uv pip install requests"
|
|
||||||
)
|
|
||||||
|
|
||||||
if not texts:
|
|
||||||
raise ValueError("Cannot compute embeddings for empty text list")
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Computing embeddings for {len(texts)} texts using Ollama API, model: '{model_name}'"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Check if Ollama is running
|
|
||||||
try:
|
|
||||||
response = requests.get(f"{host}/api/version", timeout=5)
|
|
||||||
response.raise_for_status()
|
|
||||||
except requests.exceptions.ConnectionError:
|
|
||||||
error_msg = (
|
|
||||||
f"❌ Could not connect to Ollama at {host}.\n\n"
|
|
||||||
"Please ensure Ollama is running:\n"
|
|
||||||
" • macOS/Linux: ollama serve\n"
|
|
||||||
" • Windows: Make sure Ollama is running in the system tray\n\n"
|
|
||||||
"Installation: https://ollama.com/download"
|
|
||||||
)
|
|
||||||
raise RuntimeError(error_msg)
|
|
||||||
except Exception as e:
|
|
||||||
raise RuntimeError(f"Unexpected error connecting to Ollama: {e}")
|
|
||||||
|
|
||||||
# Check if model exists and provide helpful suggestions
|
|
||||||
try:
|
|
||||||
response = requests.get(f"{host}/api/tags", timeout=5)
|
|
||||||
response.raise_for_status()
|
|
||||||
models = response.json()
|
|
||||||
model_names = [model["name"] for model in models.get("models", [])]
|
|
||||||
|
|
||||||
# Filter for embedding models (models that support embeddings)
|
|
||||||
embedding_models = []
|
|
||||||
suggested_embedding_models = [
|
|
||||||
"nomic-embed-text",
|
|
||||||
"mxbai-embed-large",
|
|
||||||
"bge-m3",
|
|
||||||
"all-minilm",
|
|
||||||
"snowflake-arctic-embed",
|
|
||||||
]
|
|
||||||
|
|
||||||
for model in model_names:
|
|
||||||
# Check if it's an embedding model (by name patterns or known models)
|
|
||||||
base_name = model.split(":")[0]
|
|
||||||
if any(emb in base_name for emb in ["embed", "bge", "minilm", "e5"]):
|
|
||||||
embedding_models.append(model)
|
|
||||||
|
|
||||||
# Check if model exists (handle versioned names) and resolve to full name
|
|
||||||
resolved_model_name = None
|
|
||||||
for name in model_names:
|
|
||||||
# Exact match
|
|
||||||
if model_name == name:
|
|
||||||
resolved_model_name = name
|
|
||||||
break
|
|
||||||
# Match without version tag (use the versioned name)
|
|
||||||
elif model_name == name.split(":")[0]:
|
|
||||||
resolved_model_name = name
|
|
||||||
break
|
|
||||||
|
|
||||||
if not resolved_model_name:
|
|
||||||
error_msg = f"❌ Model '{model_name}' not found in local Ollama.\n\n"
|
|
||||||
|
|
||||||
# Suggest pulling the model
|
|
||||||
error_msg += "📦 To install this embedding model:\n"
|
|
||||||
error_msg += f" ollama pull {model_name}\n\n"
|
|
||||||
|
|
||||||
# Show available embedding models
|
|
||||||
if embedding_models:
|
|
||||||
error_msg += "✅ Available embedding models:\n"
|
|
||||||
for model in embedding_models[:5]:
|
|
||||||
error_msg += f" • {model}\n"
|
|
||||||
if len(embedding_models) > 5:
|
|
||||||
error_msg += f" ... and {len(embedding_models) - 5} more\n"
|
|
||||||
else:
|
|
||||||
error_msg += "💡 Popular embedding models to install:\n"
|
|
||||||
for model in suggested_embedding_models[:3]:
|
|
||||||
error_msg += f" • ollama pull {model}\n"
|
|
||||||
|
|
||||||
error_msg += "\n📚 Browse more: https://ollama.com/library"
|
|
||||||
raise ValueError(error_msg)
|
|
||||||
|
|
||||||
# Use the resolved model name for all subsequent operations
|
|
||||||
if resolved_model_name != model_name:
|
|
||||||
logger.info(f"Resolved model name '{model_name}' to '{resolved_model_name}'")
|
|
||||||
model_name = resolved_model_name
|
|
||||||
|
|
||||||
# Verify the model supports embeddings by testing it
|
|
||||||
try:
|
|
||||||
test_response = requests.post(
|
|
||||||
f"{host}/api/embeddings", json={"model": model_name, "prompt": "test"}, timeout=10
|
|
||||||
)
|
|
||||||
if test_response.status_code != 200:
|
|
||||||
error_msg = (
|
|
||||||
f"⚠️ Model '{model_name}' exists but may not support embeddings.\n\n"
|
|
||||||
f"Please use an embedding model like:\n"
|
|
||||||
)
|
|
||||||
for model in suggested_embedding_models[:3]:
|
|
||||||
error_msg += f" • {model}\n"
|
|
||||||
raise ValueError(error_msg)
|
|
||||||
except requests.exceptions.RequestException:
|
|
||||||
# If test fails, continue anyway - model might still work
|
|
||||||
pass
|
|
||||||
|
|
||||||
except requests.exceptions.RequestException as e:
|
|
||||||
logger.warning(f"Could not verify model existence: {e}")
|
|
||||||
|
|
||||||
# Determine batch size based on device availability
|
|
||||||
# Check for CUDA/MPS availability using torch if available
|
|
||||||
batch_size = 32 # Default for MPS/CPU
|
|
||||||
try:
|
|
||||||
import torch
|
|
||||||
|
|
||||||
if torch.cuda.is_available():
|
|
||||||
batch_size = 128 # CUDA gets larger batch size
|
|
||||||
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
|
|
||||||
batch_size = 32 # MPS gets smaller batch size
|
|
||||||
except ImportError:
|
|
||||||
# If torch is not available, use conservative batch size
|
|
||||||
batch_size = 32
|
|
||||||
|
|
||||||
logger.info(f"Using batch size: {batch_size}")
|
|
||||||
|
|
||||||
def get_batch_embeddings(batch_texts):
|
|
||||||
"""Get embeddings for a batch of texts."""
|
|
||||||
all_embeddings = []
|
|
||||||
failed_indices = []
|
|
||||||
|
|
||||||
for i, text in enumerate(batch_texts):
|
|
||||||
max_retries = 3
|
|
||||||
retry_count = 0
|
|
||||||
|
|
||||||
# Truncate very long texts to avoid API issues
|
|
||||||
truncated_text = text[:8000] if len(text) > 8000 else text
|
|
||||||
while retry_count < max_retries:
|
|
||||||
try:
|
|
||||||
response = requests.post(
|
|
||||||
f"{host}/api/embeddings",
|
|
||||||
json={"model": model_name, "prompt": truncated_text},
|
|
||||||
timeout=30,
|
|
||||||
)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
result = response.json()
|
|
||||||
embedding = result.get("embedding")
|
|
||||||
|
|
||||||
if embedding is None:
|
|
||||||
raise ValueError(f"No embedding returned for text {i}")
|
|
||||||
|
|
||||||
if not isinstance(embedding, list) or len(embedding) == 0:
|
|
||||||
raise ValueError(f"Invalid embedding format for text {i}")
|
|
||||||
|
|
||||||
all_embeddings.append(embedding)
|
|
||||||
break
|
|
||||||
|
|
||||||
except requests.exceptions.Timeout:
|
|
||||||
retry_count += 1
|
|
||||||
if retry_count >= max_retries:
|
|
||||||
logger.warning(f"Timeout for text {i} after {max_retries} retries")
|
|
||||||
failed_indices.append(i)
|
|
||||||
all_embeddings.append(None)
|
|
||||||
break
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
retry_count += 1
|
|
||||||
if retry_count >= max_retries:
|
|
||||||
logger.error(f"Failed to get embedding for text {i}: {e}")
|
|
||||||
failed_indices.append(i)
|
|
||||||
all_embeddings.append(None)
|
|
||||||
break
|
|
||||||
return all_embeddings, failed_indices
|
|
||||||
|
|
||||||
# Process texts in batches
|
|
||||||
all_embeddings = []
|
|
||||||
all_failed_indices = []
|
|
||||||
|
|
||||||
# Setup progress bar if needed
|
|
||||||
show_progress = is_build or len(texts) > 10
|
|
||||||
try:
|
|
||||||
if show_progress:
|
|
||||||
from tqdm import tqdm
|
|
||||||
except ImportError:
|
|
||||||
show_progress = False
|
|
||||||
|
|
||||||
# Process batches
|
|
||||||
num_batches = (len(texts) + batch_size - 1) // batch_size
|
|
||||||
|
|
||||||
if show_progress:
|
|
||||||
batch_iterator = tqdm(range(num_batches), desc="Computing Ollama embeddings")
|
|
||||||
else:
|
|
||||||
batch_iterator = range(num_batches)
|
|
||||||
|
|
||||||
for batch_idx in batch_iterator:
|
|
||||||
start_idx = batch_idx * batch_size
|
|
||||||
end_idx = min(start_idx + batch_size, len(texts))
|
|
||||||
batch_texts = texts[start_idx:end_idx]
|
|
||||||
|
|
||||||
batch_embeddings, batch_failed = get_batch_embeddings(batch_texts)
|
|
||||||
|
|
||||||
# Adjust failed indices to global indices
|
|
||||||
global_failed = [start_idx + idx for idx in batch_failed]
|
|
||||||
all_failed_indices.extend(global_failed)
|
|
||||||
all_embeddings.extend(batch_embeddings)
|
|
||||||
|
|
||||||
# Handle failed embeddings
|
|
||||||
if all_failed_indices:
|
|
||||||
if len(all_failed_indices) == len(texts):
|
|
||||||
raise RuntimeError("Failed to compute any embeddings")
|
|
||||||
|
|
||||||
logger.warning(
|
|
||||||
f"Failed to compute embeddings for {len(all_failed_indices)}/{len(texts)} texts"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Use zero embeddings as fallback for failed ones
|
|
||||||
valid_embedding = next((e for e in all_embeddings if e is not None), None)
|
|
||||||
if valid_embedding:
|
|
||||||
embedding_dim = len(valid_embedding)
|
|
||||||
for i, embedding in enumerate(all_embeddings):
|
|
||||||
if embedding is None:
|
|
||||||
all_embeddings[i] = [0.0] * embedding_dim
|
|
||||||
|
|
||||||
# Remove None values
|
|
||||||
all_embeddings = [e for e in all_embeddings if e is not None]
|
|
||||||
|
|
||||||
if not all_embeddings:
|
|
||||||
raise RuntimeError("No valid embeddings were computed")
|
|
||||||
|
|
||||||
# Validate embedding dimensions
|
|
||||||
expected_dim = len(all_embeddings[0])
|
|
||||||
inconsistent_dims = []
|
|
||||||
for i, embedding in enumerate(all_embeddings):
|
|
||||||
if len(embedding) != expected_dim:
|
|
||||||
inconsistent_dims.append((i, len(embedding)))
|
|
||||||
|
|
||||||
if inconsistent_dims:
|
|
||||||
error_msg = f"Ollama returned inconsistent embedding dimensions. Expected {expected_dim}, but got:\n"
|
|
||||||
for idx, dim in inconsistent_dims[:10]: # Show first 10 inconsistent ones
|
|
||||||
error_msg += f" - Text {idx}: {dim} dimensions\n"
|
|
||||||
if len(inconsistent_dims) > 10:
|
|
||||||
error_msg += f" ... and {len(inconsistent_dims) - 10} more\n"
|
|
||||||
error_msg += f"\nThis is likely an Ollama API bug with model '{model_name}'. Please try:\n"
|
|
||||||
error_msg += "1. Restart Ollama service: 'ollama serve'\n"
|
|
||||||
error_msg += f"2. Re-pull the model: 'ollama pull {model_name}'\n"
|
|
||||||
error_msg += (
|
|
||||||
"3. Use sentence-transformers instead: --embedding-mode sentence-transformers\n"
|
|
||||||
)
|
|
||||||
error_msg += "4. Report this issue to Ollama: https://github.com/ollama/ollama/issues"
|
|
||||||
raise ValueError(error_msg)
|
|
||||||
|
|
||||||
# Convert to numpy array and normalize
|
|
||||||
embeddings = np.array(all_embeddings, dtype=np.float32)
|
|
||||||
|
|
||||||
# Normalize embeddings (L2 normalization)
|
|
||||||
norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
|
|
||||||
embeddings = embeddings / (norms + 1e-8) # Add small epsilon to avoid division by zero
|
|
||||||
|
|
||||||
logger.info(f"Generated {len(embeddings)} embeddings, dimension: {embeddings.shape[1]}")
|
|
||||||
|
|
||||||
return embeddings
|
|
||||||
|
|||||||
@@ -6,7 +6,6 @@ import subprocess
|
|||||||
import sys
|
import sys
|
||||||
import time
|
import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
|
|
||||||
@@ -183,8 +182,8 @@ class EmbeddingServerManager:
|
|||||||
e.g., "leann_backend_diskann.embedding_server"
|
e.g., "leann_backend_diskann.embedding_server"
|
||||||
"""
|
"""
|
||||||
self.backend_module_name = backend_module_name
|
self.backend_module_name = backend_module_name
|
||||||
self.server_process: Optional[subprocess.Popen] = None
|
self.server_process: subprocess.Popen | None = None
|
||||||
self.server_port: Optional[int] = None
|
self.server_port: int | None = None
|
||||||
self._atexit_registered = False
|
self._atexit_registered = False
|
||||||
|
|
||||||
def start_server(
|
def start_server(
|
||||||
@@ -355,21 +354,13 @@ class EmbeddingServerManager:
|
|||||||
self.server_process.terminate()
|
self.server_process.terminate()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.server_process.wait(timeout=3)
|
self.server_process.wait(timeout=5)
|
||||||
logger.info(f"Server process {self.server_process.pid} terminated.")
|
logger.info(f"Server process {self.server_process.pid} terminated.")
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Server process {self.server_process.pid} did not terminate gracefully within 3 seconds, killing it."
|
f"Server process {self.server_process.pid} did not terminate gracefully, killing it."
|
||||||
)
|
)
|
||||||
self.server_process.kill()
|
self.server_process.kill()
|
||||||
try:
|
|
||||||
self.server_process.wait(timeout=2)
|
|
||||||
logger.info(f"Server process {self.server_process.pid} killed successfully.")
|
|
||||||
except subprocess.TimeoutExpired:
|
|
||||||
logger.error(
|
|
||||||
f"Failed to kill server process {self.server_process.pid} - it may be hung"
|
|
||||||
)
|
|
||||||
# Don't hang indefinitely
|
|
||||||
|
|
||||||
# Clean up process resources to prevent resource tracker warnings
|
# Clean up process resources to prevent resource tracker warnings
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Any, Literal, Union
|
from typing import Any, Literal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -34,9 +34,7 @@ class LeannBackendSearcherInterface(ABC):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def _ensure_server_running(
|
def _ensure_server_running(self, passages_source_file: str, port: int | None, **kwargs) -> int:
|
||||||
self, passages_source_file: str, port: Union[int, None], **kwargs
|
|
||||||
) -> int:
|
|
||||||
"""Ensure server is running"""
|
"""Ensure server is running"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -50,7 +48,7 @@ class LeannBackendSearcherInterface(ABC):
|
|||||||
prune_ratio: float = 0.0,
|
prune_ratio: float = 0.0,
|
||||||
recompute_embeddings: bool = False,
|
recompute_embeddings: bool = False,
|
||||||
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
||||||
zmq_port: Union[int, None] = None,
|
zmq_port: int | None = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Search for nearest neighbors
|
"""Search for nearest neighbors
|
||||||
@@ -76,7 +74,7 @@ class LeannBackendSearcherInterface(ABC):
|
|||||||
self,
|
self,
|
||||||
query: str,
|
query: str,
|
||||||
use_server_if_available: bool = True,
|
use_server_if_available: bool = True,
|
||||||
zmq_port: Union[int, None] = None,
|
zmq_port: int | None = None,
|
||||||
) -> np.ndarray:
|
) -> np.ndarray:
|
||||||
"""Compute embedding for a query string
|
"""Compute embedding for a query string
|
||||||
|
|
||||||
|
|||||||
@@ -1,176 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import json
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def handle_request(request):
|
|
||||||
if request.get("method") == "initialize":
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"result": {
|
|
||||||
"capabilities": {"tools": {}},
|
|
||||||
"protocolVersion": "2024-11-05",
|
|
||||||
"serverInfo": {"name": "leann-mcp", "version": "1.0.0"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
elif request.get("method") == "tools/list":
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"result": {
|
|
||||||
"tools": [
|
|
||||||
{
|
|
||||||
"name": "leann_search",
|
|
||||||
"description": """🔍 Search code using natural language - like having a coding assistant who knows your entire codebase!
|
|
||||||
|
|
||||||
🎯 **Perfect for**:
|
|
||||||
- "How does authentication work?" → finds auth-related code
|
|
||||||
- "Error handling patterns" → locates try-catch blocks and error logic
|
|
||||||
- "Database connection setup" → finds DB initialization code
|
|
||||||
- "API endpoint definitions" → locates route handlers
|
|
||||||
- "Configuration management" → finds config files and usage
|
|
||||||
|
|
||||||
💡 **Pro tip**: Use this before making any changes to understand existing patterns and conventions.""",
|
|
||||||
"inputSchema": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"index_name": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Name of the LEANN index to search. Use 'leann_list' first to see available indexes.",
|
|
||||||
},
|
|
||||||
"query": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Search query - can be natural language (e.g., 'how to handle errors') or technical terms (e.g., 'async function definition')",
|
|
||||||
},
|
|
||||||
"top_k": {
|
|
||||||
"type": "integer",
|
|
||||||
"default": 5,
|
|
||||||
"minimum": 1,
|
|
||||||
"maximum": 20,
|
|
||||||
"description": "Number of search results to return. Use 5-10 for focused results, 15-20 for comprehensive exploration.",
|
|
||||||
},
|
|
||||||
"complexity": {
|
|
||||||
"type": "integer",
|
|
||||||
"default": 32,
|
|
||||||
"minimum": 16,
|
|
||||||
"maximum": 128,
|
|
||||||
"description": "Search complexity level. Use 16-32 for fast searches (recommended), 64+ for higher precision when needed.",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
"required": ["index_name", "query"],
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "leann_status",
|
|
||||||
"description": "📊 Check the health and stats of your code indexes - like a medical checkup for your codebase knowledge!",
|
|
||||||
"inputSchema": {
|
|
||||||
"type": "object",
|
|
||||||
"properties": {
|
|
||||||
"index_name": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "Optional: Name of specific index to check. If not provided, shows status of all indexes.",
|
|
||||||
}
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "leann_list",
|
|
||||||
"description": "📋 Show all your indexed codebases - your personal code library! Use this to see what's available for search.",
|
|
||||||
"inputSchema": {"type": "object", "properties": {}},
|
|
||||||
},
|
|
||||||
]
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
elif request.get("method") == "tools/call":
|
|
||||||
tool_name = request["params"]["name"]
|
|
||||||
args = request["params"].get("arguments", {})
|
|
||||||
|
|
||||||
try:
|
|
||||||
if tool_name == "leann_search":
|
|
||||||
# Validate required parameters
|
|
||||||
if not args.get("index_name") or not args.get("query"):
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"result": {
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": "Error: Both index_name and query are required",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
# Build simplified command
|
|
||||||
cmd = [
|
|
||||||
"leann",
|
|
||||||
"search",
|
|
||||||
args["index_name"],
|
|
||||||
args["query"],
|
|
||||||
f"--top-k={args.get('top_k', 5)}",
|
|
||||||
f"--complexity={args.get('complexity', 32)}",
|
|
||||||
]
|
|
||||||
|
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
||||||
|
|
||||||
elif tool_name == "leann_status":
|
|
||||||
if args.get("index_name"):
|
|
||||||
# Check specific index status - for now, we'll use leann list and filter
|
|
||||||
result = subprocess.run(["leann", "list"], capture_output=True, text=True)
|
|
||||||
# We could enhance this to show more detailed status per index
|
|
||||||
else:
|
|
||||||
# Show all indexes status
|
|
||||||
result = subprocess.run(["leann", "list"], capture_output=True, text=True)
|
|
||||||
|
|
||||||
elif tool_name == "leann_list":
|
|
||||||
result = subprocess.run(["leann", "list"], capture_output=True, text=True)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"result": {
|
|
||||||
"content": [
|
|
||||||
{
|
|
||||||
"type": "text",
|
|
||||||
"text": result.stdout
|
|
||||||
if result.returncode == 0
|
|
||||||
else f"Error: {result.stderr}",
|
|
||||||
}
|
|
||||||
]
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": request.get("id"),
|
|
||||||
"error": {"code": -1, "message": str(e)},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
for line in sys.stdin:
|
|
||||||
try:
|
|
||||||
request = json.loads(line.strip())
|
|
||||||
response = handle_request(request)
|
|
||||||
if response:
|
|
||||||
print(json.dumps(response))
|
|
||||||
sys.stdout.flush()
|
|
||||||
except Exception as e:
|
|
||||||
error_response = {
|
|
||||||
"jsonrpc": "2.0",
|
|
||||||
"id": None,
|
|
||||||
"error": {"code": -1, "message": str(e)},
|
|
||||||
}
|
|
||||||
print(json.dumps(error_response))
|
|
||||||
sys.stdout.flush()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
import json
|
import json
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Literal, Optional
|
from typing import Any, Literal
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@@ -169,7 +169,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC):
|
|||||||
prune_ratio: float = 0.0,
|
prune_ratio: float = 0.0,
|
||||||
recompute_embeddings: bool = False,
|
recompute_embeddings: bool = False,
|
||||||
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
pruning_strategy: Literal["global", "local", "proportional"] = "global",
|
||||||
zmq_port: Optional[int] = None,
|
zmq_port: int | None = None,
|
||||||
**kwargs,
|
**kwargs,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,127 +0,0 @@
|
|||||||
# 🔥 LEANN Claude Code Integration
|
|
||||||
|
|
||||||
Transform your development workflow with intelligent code assistance using LEANN's semantic search directly in Claude Code.
|
|
||||||
|
|
||||||
## Prerequisites
|
|
||||||
|
|
||||||
**Step 1:** First, complete the basic LEANN installation following the [📦 Installation guide](../../README.md#installation) in the root README:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
uv venv
|
|
||||||
source .venv/bin/activate
|
|
||||||
uv pip install leann
|
|
||||||
```
|
|
||||||
|
|
||||||
**Step 2:** Install LEANN globally for MCP integration:
|
|
||||||
```bash
|
|
||||||
uv tool install leann-core
|
|
||||||
```
|
|
||||||
|
|
||||||
This makes the `leann` command available system-wide, which `leann_mcp` requires.
|
|
||||||
|
|
||||||
## 🚀 Quick Setup
|
|
||||||
|
|
||||||
Add the LEANN MCP server to Claude Code:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
claude mcp add leann-server -- leann_mcp
|
|
||||||
```
|
|
||||||
|
|
||||||
## 🛠️ Available Tools
|
|
||||||
|
|
||||||
Once connected, you'll have access to these powerful semantic search tools in Claude Code:
|
|
||||||
|
|
||||||
- **`leann_list`** - List all available indexes across your projects
|
|
||||||
- **`leann_search`** - Perform semantic searches across code and documents
|
|
||||||
- **`leann_ask`** - Ask natural language questions and get AI-powered answers from your codebase
|
|
||||||
|
|
||||||
## 🎯 Quick Start Example
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# Build an index for your project (change to your actual path)
|
|
||||||
leann build my-project --docs ./
|
|
||||||
|
|
||||||
# Start Claude Code
|
|
||||||
claude
|
|
||||||
```
|
|
||||||
|
|
||||||
## 🚀 Advanced Usage Examples
|
|
||||||
|
|
||||||
### Index Entire Git Repository
|
|
||||||
```bash
|
|
||||||
# Index all tracked files in your git repository, note right now we will skip submodules, but we can add it back easily if you want
|
|
||||||
leann build my-repo --docs $(git ls-files) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
|
|
||||||
|
|
||||||
# Index only specific file types from git
|
|
||||||
leann build my-python-code --docs $(git ls-files "*.py") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
|
|
||||||
```
|
|
||||||
|
|
||||||
### Multiple Directories and Files
|
|
||||||
```bash
|
|
||||||
# Index multiple directories
|
|
||||||
leann build my-codebase --docs ./src ./tests ./docs ./config --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
|
|
||||||
|
|
||||||
# Mix files and directories
|
|
||||||
leann build my-project --docs ./README.md ./src/ ./package.json ./docs/ --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
|
|
||||||
|
|
||||||
# Specific files only
|
|
||||||
leann build my-configs --docs ./tsconfig.json ./package.json ./webpack.config.js --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
|
|
||||||
```
|
|
||||||
|
|
||||||
### Advanced Git Integration
|
|
||||||
```bash
|
|
||||||
# Index recently modified files
|
|
||||||
leann build recent-changes --docs $(git diff --name-only HEAD~10..HEAD) --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
|
|
||||||
|
|
||||||
# Index files matching pattern
|
|
||||||
leann build frontend --docs $(git ls-files "*.tsx" "*.ts" "*.jsx" "*.js") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
|
|
||||||
|
|
||||||
# Index documentation and config files
|
|
||||||
leann build docs-and-configs --docs $(git ls-files "*.md" "*.yml" "*.yaml" "*.json" "*.toml") --embedding-mode sentence-transformers --embedding-model all-MiniLM-L6-v2 --backend hnsw
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|
||||||
**Try this in Claude Code:**
|
|
||||||
```
|
|
||||||
Help me understand this codebase. List available indexes and search for authentication patterns.
|
|
||||||
```
|
|
||||||
|
|
||||||
<p align="center">
|
|
||||||
<img src="../../assets/claude_code_leann.png" alt="LEANN in Claude Code" width="80%">
|
|
||||||
</p>
|
|
||||||
|
|
||||||
|
|
||||||
## 🧠 How It Works
|
|
||||||
|
|
||||||
The integration consists of three key components working seamlessly together:
|
|
||||||
|
|
||||||
- **`leann`** - Core CLI tool for indexing and searching (installed globally via `uv tool install`)
|
|
||||||
- **`leann_mcp`** - MCP server that wraps `leann` commands for Claude Code integration
|
|
||||||
- **Claude Code** - Calls `leann_mcp`, which executes `leann` commands and returns intelligent results
|
|
||||||
|
|
||||||
## 📁 File Support
|
|
||||||
|
|
||||||
LEANN understands **30+ file types** including:
|
|
||||||
- **Programming**: Python, JavaScript, TypeScript, Java, Go, Rust, C++, C#
|
|
||||||
- **Data**: SQL, YAML, JSON, CSV, XML
|
|
||||||
- **Documentation**: Markdown, TXT, PDF
|
|
||||||
- **And many more!**
|
|
||||||
|
|
||||||
## 💾 Storage & Organization
|
|
||||||
|
|
||||||
- **Project indexes**: Stored in `.leann/` directory (just like `.git`)
|
|
||||||
- **Global registry**: Project tracking at `~/.leann/projects.json`
|
|
||||||
- **Multi-project support**: Switch between different codebases seamlessly
|
|
||||||
- **Portable**: Transfer indexes between machines with minimal overhead
|
|
||||||
|
|
||||||
## 🗑️ Uninstalling
|
|
||||||
|
|
||||||
To remove the LEANN MCP server from Claude Code:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
claude mcp remove leann-server
|
|
||||||
```
|
|
||||||
To remove LEANN
|
|
||||||
```
|
|
||||||
uv pip uninstall leann leann-backend-hnsw leann-core
|
|
||||||
```
|
|
||||||
@@ -5,8 +5,11 @@ LEANN is a revolutionary vector database that democratizes personal AI. Transfor
|
|||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Default installation (includes both HNSW and DiskANN backends)
|
# Default installation (HNSW backend, recommended)
|
||||||
uv pip install leann
|
uv pip install leann
|
||||||
|
|
||||||
|
# With DiskANN backend (for large-scale deployments)
|
||||||
|
uv pip install leann[diskann]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
@@ -16,8 +19,8 @@ from leann import LeannBuilder, LeannSearcher, LeannChat
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
INDEX_PATH = str(Path("./").resolve() / "demo.leann")
|
INDEX_PATH = str(Path("./").resolve() / "demo.leann")
|
||||||
|
|
||||||
# Build an index (choose backend: "hnsw" or "diskann")
|
# Build an index
|
||||||
builder = LeannBuilder(backend_name="hnsw") # or "diskann" for large-scale deployments
|
builder = LeannBuilder(backend_name="hnsw")
|
||||||
builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
|
builder.add_text("LEANN saves 97% storage compared to traditional vector databases.")
|
||||||
builder.add_text("Tung Tung Tung Sahur called—they need their banana‑crocodile hybrid back")
|
builder.add_text("Tung Tung Tung Sahur called—they need their banana‑crocodile hybrid back")
|
||||||
builder.build_index(INDEX_PATH)
|
builder.build_index(INDEX_PATH)
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann"
|
name = "leann"
|
||||||
version = "0.2.7"
|
version = "0.1.16"
|
||||||
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
@@ -24,15 +24,16 @@ classifiers = [
|
|||||||
"Programming Language :: Python :: 3.12",
|
"Programming Language :: Python :: 3.12",
|
||||||
]
|
]
|
||||||
|
|
||||||
# Default installation: core + hnsw + diskann
|
# Default installation: core + hnsw
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"leann-core>=0.1.0",
|
"leann-core>=0.1.0",
|
||||||
"leann-backend-hnsw>=0.1.0",
|
"leann-backend-hnsw>=0.1.0",
|
||||||
"leann-backend-diskann>=0.1.0",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
# All backends now included by default
|
diskann = [
|
||||||
|
"leann-backend-diskann>=0.1.0",
|
||||||
|
]
|
||||||
|
|
||||||
[project.urls]
|
[project.urls]
|
||||||
Repository = "https://github.com/yichuan-w/LEANN"
|
Repository = "https://github.com/yichuan-w/LEANN"
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ dependencies = [
|
|||||||
"pypdfium2>=4.30.0",
|
"pypdfium2>=4.30.0",
|
||||||
# LlamaIndex core and readers - updated versions
|
# LlamaIndex core and readers - updated versions
|
||||||
"llama-index>=0.12.44",
|
"llama-index>=0.12.44",
|
||||||
"llama-index-readers-file>=0.4.0", # Essential for PDF parsing
|
"llama-index-readers-file>=0.4.0", # Essential for PDF parsing
|
||||||
# "llama-index-readers-docling", # Requires Python >= 3.10
|
# "llama-index-readers-docling", # Requires Python >= 3.10
|
||||||
# "llama-index-node-parser-docling", # Requires Python >= 3.10
|
# "llama-index-node-parser-docling", # Requires Python >= 3.10
|
||||||
"llama-index-vector-stores-faiss>=0.4.0",
|
"llama-index-vector-stores-faiss>=0.4.0",
|
||||||
@@ -40,12 +40,9 @@ dependencies = [
|
|||||||
# Other dependencies
|
# Other dependencies
|
||||||
"ipykernel==6.29.5",
|
"ipykernel==6.29.5",
|
||||||
"msgpack>=1.1.1",
|
"msgpack>=1.1.1",
|
||||||
"mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
"mlx>=0.26.3; sys_platform == 'darwin'",
|
||||||
"mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
|
"mlx-lm>=0.26.0; sys_platform == 'darwin'",
|
||||||
"psutil>=5.8.0",
|
"psutil>=5.8.0",
|
||||||
"pathspec>=0.12.1",
|
|
||||||
"nbconvert>=7.16.6",
|
|
||||||
"gitignore-parser>=0.1.12",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
@@ -91,7 +88,7 @@ leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = tr
|
|||||||
leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }
|
leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }
|
||||||
|
|
||||||
[tool.ruff]
|
[tool.ruff]
|
||||||
target-version = "py39"
|
target-version = "py310"
|
||||||
line-length = 100
|
line-length = 100
|
||||||
extend-exclude = [
|
extend-exclude = [
|
||||||
"third_party",
|
"third_party",
|
||||||
|
|||||||
364
uv.lock
generated
364
uv.lock
generated
@@ -294,23 +294,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646 },
|
{ url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "bleach"
|
|
||||||
version = "6.2.0"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "webencodings" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/76/9a/0e33f5054c54d349ea62c277191c020c2d6ef1d65ab2cb1993f91ec846d1/bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f", size = 203083 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/fc/55/96142937f66150805c25c4d0f31ee4132fd33497753400734f9dfdcbdc66/bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e", size = 163406 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[package.optional-dependencies]
|
|
||||||
css = [
|
|
||||||
{ name = "tinycss2" },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "blinker"
|
name = "blinker"
|
||||||
version = "1.9.0"
|
version = "1.9.0"
|
||||||
@@ -1269,15 +1252,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
|
{ url = "https://files.pythonhosted.org/packages/7b/8f/c4d9bafc34ad7ad5d8dc16dd1347ee0e507a52c3adb6bfa8887e1c6a26ba/executing-2.2.0-py2.py3-none-any.whl", hash = "sha256:11387150cad388d62750327a53d3339fad4888b39a6fe233c3afbb54ecffd3aa", size = 26702 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fastjsonschema"
|
|
||||||
version = "2.21.1"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/8b/50/4b769ce1ac4071a1ef6d86b1a3fb56cdc3a37615e8c5519e1af96cdac366/fastjsonschema-2.21.1.tar.gz", hash = "sha256:794d4f0a58f848961ba16af7b9c85a3e88cd360df008c59aac6fc5ae9323b5d4", size = 373939 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/90/2b/0817a2b257fe88725c25589d89aec060581aabf668707a8d03b2e9e0cb2a/fastjsonschema-2.21.1-py3-none-any.whl", hash = "sha256:c9e5b7e908310918cf494a434eeb31384dd84a98b57a30bcb1f535015b554667", size = 23924 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "filelock"
|
name = "filelock"
|
||||||
version = "3.18.0"
|
version = "3.18.0"
|
||||||
@@ -1504,12 +1478,6 @@ http = [
|
|||||||
{ name = "aiohttp" },
|
{ name = "aiohttp" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "gitignore-parser"
|
|
||||||
version = "0.1.12"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/86/a8/faf07759672973362e3f1f9742281a90aec7846e8a4043c4df5652990054/gitignore_parser-0.1.12.tar.gz", hash = "sha256:78b22243adc0f02102c56c5e8c9a1d9121394142ca6143a90daa7f8d7a07a17e", size = 5407 }
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "greenlet"
|
name = "greenlet"
|
||||||
version = "3.2.3"
|
version = "3.2.3"
|
||||||
@@ -1682,7 +1650,7 @@ name = "importlib-metadata"
|
|||||||
version = "8.7.0"
|
version = "8.7.0"
|
||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "zipp", marker = "python_full_version < '3.10'" },
|
{ name = "zipp" },
|
||||||
]
|
]
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641 }
|
sdist = { url = "https://files.pythonhosted.org/packages/76/66/650a33bd90f786193e4de4b3ad86ea60b53c89b669a5c7be931fac31cdb0/importlib_metadata-8.7.0.tar.gz", hash = "sha256:d13b81ad223b890aa16c5471f2ac3056cf76c5f10f82d6f9292f0b415f389000", size = 56641 }
|
||||||
wheels = [
|
wheels = [
|
||||||
@@ -1959,33 +1927,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746 },
|
{ url = "https://files.pythonhosted.org/packages/7d/4f/1195bbac8e0c2acc5f740661631d8d750dc38d4a32b23ee5df3cde6f4e0d/joblib-1.5.1-py3-none-any.whl", hash = "sha256:4719a31f054c7d766948dcd83e9613686b27114f190f717cec7eaa2084f8a74a", size = 307746 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "jsonschema"
|
|
||||||
version = "4.25.0"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "attrs" },
|
|
||||||
{ name = "jsonschema-specifications" },
|
|
||||||
{ name = "referencing" },
|
|
||||||
{ name = "rpds-py" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/d5/00/a297a868e9d0784450faa7365c2172a7d6110c763e30ba861867c32ae6a9/jsonschema-4.25.0.tar.gz", hash = "sha256:e63acf5c11762c0e6672ffb61482bdf57f0876684d8d249c0fe2d730d48bc55f", size = 356830 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/fe/54/c86cd8e011fe98803d7e382fd67c0df5ceab8d2b7ad8c5a81524f791551c/jsonschema-4.25.0-py3-none-any.whl", hash = "sha256:24c2e8da302de79c8b9382fee3e76b355e44d2a4364bb207159ce10b517bd716", size = 89184 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "jsonschema-specifications"
|
|
||||||
version = "2025.4.1"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "referencing" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/bf/ce/46fbd9c8119cfc3581ee5643ea49464d168028cfb5caff5fc0596d0cf914/jsonschema_specifications-2025.4.1.tar.gz", hash = "sha256:630159c9f4dbea161a6a2205c3011cc4f18ff381b189fff48bb39b9bf26ae608", size = 15513 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/01/0e/b27cdbaccf30b890c40ed1da9fd4a3593a5cf94dae54fb34f8a4b74fcd3f/jsonschema_specifications-2025.4.1-py3-none-any.whl", hash = "sha256:4653bffbd6584f7de83a67e0d620ef16900b390ddc7939d56684d6c81e33f1af", size = 18437 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jupyter-client"
|
name = "jupyter-client"
|
||||||
version = "8.6.3"
|
version = "8.6.3"
|
||||||
@@ -2017,15 +1958,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/2f/57/6bffd4b20b88da3800c5d691e0337761576ee688eb01299eae865689d2df/jupyter_core-5.8.1-py3-none-any.whl", hash = "sha256:c28d268fc90fb53f1338ded2eb410704c5449a358406e8a948b75706e24863d0", size = 28880 },
|
{ url = "https://files.pythonhosted.org/packages/2f/57/6bffd4b20b88da3800c5d691e0337761576ee688eb01299eae865689d2df/jupyter_core-5.8.1-py3-none-any.whl", hash = "sha256:c28d268fc90fb53f1338ded2eb410704c5449a358406e8a948b75706e24863d0", size = 28880 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "jupyterlab-pygments"
|
|
||||||
version = "0.3.0"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/90/51/9187be60d989df97f5f0aba133fa54e7300f17616e065d1ada7d7646b6d6/jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d", size = 512900 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b1/dd/ead9d8ea85bf202d90cc513b533f9c363121c7792674f78e0d8a854b63b4/jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780", size = 15884 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "kiwisolver"
|
name = "kiwisolver"
|
||||||
version = "1.4.7"
|
version = "1.4.7"
|
||||||
@@ -2223,7 +2155,7 @@ wheels = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-backend-diskann"
|
name = "leann-backend-diskann"
|
||||||
version = "0.2.6"
|
version = "0.1.15"
|
||||||
source = { editable = "packages/leann-backend-diskann" }
|
source = { editable = "packages/leann-backend-diskann" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "leann-core" },
|
{ name = "leann-core" },
|
||||||
@@ -2235,14 +2167,14 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "leann-core", specifier = "==0.2.6" },
|
{ name = "leann-core", specifier = "==0.1.15" },
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
{ name = "protobuf", specifier = ">=3.19.0" },
|
{ name = "protobuf", specifier = ">=3.19.0" },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-backend-hnsw"
|
name = "leann-backend-hnsw"
|
||||||
version = "0.2.6"
|
version = "0.1.15"
|
||||||
source = { editable = "packages/leann-backend-hnsw" }
|
source = { editable = "packages/leann-backend-hnsw" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "leann-core" },
|
{ name = "leann-core" },
|
||||||
@@ -2255,7 +2187,7 @@ dependencies = [
|
|||||||
|
|
||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "leann-core", specifier = "==0.2.6" },
|
{ name = "leann-core", specifier = "==0.1.15" },
|
||||||
{ name = "msgpack", specifier = ">=1.0.0" },
|
{ name = "msgpack", specifier = ">=1.0.0" },
|
||||||
{ name = "numpy" },
|
{ name = "numpy" },
|
||||||
{ name = "pyzmq", specifier = ">=23.0.0" },
|
{ name = "pyzmq", specifier = ">=23.0.0" },
|
||||||
@@ -2263,11 +2195,10 @@ requires-dist = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "leann-core"
|
name = "leann-core"
|
||||||
version = "0.2.6"
|
version = "0.1.15"
|
||||||
source = { editable = "packages/leann-core" }
|
source = { editable = "packages/leann-core" }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "accelerate" },
|
{ name = "accelerate" },
|
||||||
{ name = "gitignore-parser" },
|
|
||||||
{ name = "huggingface-hub" },
|
{ name = "huggingface-hub" },
|
||||||
{ name = "llama-index-core" },
|
{ name = "llama-index-core" },
|
||||||
{ name = "llama-index-embeddings-huggingface" },
|
{ name = "llama-index-embeddings-huggingface" },
|
||||||
@@ -2275,7 +2206,6 @@ dependencies = [
|
|||||||
{ name = "mlx", marker = "sys_platform == 'darwin'" },
|
{ name = "mlx", marker = "sys_platform == 'darwin'" },
|
||||||
{ name = "mlx-lm", marker = "sys_platform == 'darwin'" },
|
{ name = "mlx-lm", marker = "sys_platform == 'darwin'" },
|
||||||
{ name = "msgpack" },
|
{ name = "msgpack" },
|
||||||
{ name = "nbconvert" },
|
|
||||||
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
||||||
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
|
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
|
||||||
{ name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
|
{ name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
|
||||||
@@ -2297,7 +2227,6 @@ dependencies = [
|
|||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "accelerate", specifier = ">=0.20.0" },
|
{ name = "accelerate", specifier = ">=0.20.0" },
|
||||||
{ name = "accelerate", marker = "extra == 'colab'", specifier = ">=0.20.0,<1.0.0" },
|
{ name = "accelerate", marker = "extra == 'colab'", specifier = ">=0.20.0,<1.0.0" },
|
||||||
{ name = "gitignore-parser", specifier = ">=0.1.12" },
|
|
||||||
{ name = "huggingface-hub", specifier = ">=0.20.0" },
|
{ name = "huggingface-hub", specifier = ">=0.20.0" },
|
||||||
{ name = "llama-index-core", specifier = ">=0.12.0" },
|
{ name = "llama-index-core", specifier = ">=0.12.0" },
|
||||||
{ name = "llama-index-embeddings-huggingface", specifier = ">=0.5.5" },
|
{ name = "llama-index-embeddings-huggingface", specifier = ">=0.5.5" },
|
||||||
@@ -2305,7 +2234,6 @@ requires-dist = [
|
|||||||
{ name = "mlx", marker = "sys_platform == 'darwin'", specifier = ">=0.26.3" },
|
{ name = "mlx", marker = "sys_platform == 'darwin'", specifier = ">=0.26.3" },
|
||||||
{ name = "mlx-lm", marker = "sys_platform == 'darwin'", specifier = ">=0.26.0" },
|
{ name = "mlx-lm", marker = "sys_platform == 'darwin'", specifier = ">=0.26.0" },
|
||||||
{ name = "msgpack", specifier = ">=1.0.0" },
|
{ name = "msgpack", specifier = ">=1.0.0" },
|
||||||
{ name = "nbconvert", specifier = ">=7.0.0" },
|
|
||||||
{ name = "numpy", specifier = ">=1.20.0" },
|
{ name = "numpy", specifier = ">=1.20.0" },
|
||||||
{ name = "openai", specifier = ">=1.0.0" },
|
{ name = "openai", specifier = ">=1.0.0" },
|
||||||
{ name = "pdfplumber", specifier = ">=0.10.0" },
|
{ name = "pdfplumber", specifier = ">=0.10.0" },
|
||||||
@@ -2335,7 +2263,6 @@ dependencies = [
|
|||||||
{ name = "evaluate" },
|
{ name = "evaluate" },
|
||||||
{ name = "flask" },
|
{ name = "flask" },
|
||||||
{ name = "flask-compress" },
|
{ name = "flask-compress" },
|
||||||
{ name = "gitignore-parser" },
|
|
||||||
{ name = "ipykernel" },
|
{ name = "ipykernel" },
|
||||||
{ name = "leann-backend-hnsw" },
|
{ name = "leann-backend-hnsw" },
|
||||||
{ name = "leann-core" },
|
{ name = "leann-core" },
|
||||||
@@ -2346,13 +2273,11 @@ dependencies = [
|
|||||||
{ name = "mlx", marker = "sys_platform == 'darwin'" },
|
{ name = "mlx", marker = "sys_platform == 'darwin'" },
|
||||||
{ name = "mlx-lm", marker = "sys_platform == 'darwin'" },
|
{ name = "mlx-lm", marker = "sys_platform == 'darwin'" },
|
||||||
{ name = "msgpack" },
|
{ name = "msgpack" },
|
||||||
{ name = "nbconvert" },
|
|
||||||
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
{ name = "numpy", version = "2.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
|
||||||
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
|
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version == '3.10.*'" },
|
||||||
{ name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
|
{ name = "numpy", version = "2.3.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
|
||||||
{ name = "ollama" },
|
{ name = "ollama" },
|
||||||
{ name = "openai" },
|
{ name = "openai" },
|
||||||
{ name = "pathspec" },
|
|
||||||
{ name = "pdfplumber" },
|
{ name = "pdfplumber" },
|
||||||
{ name = "protobuf" },
|
{ name = "protobuf" },
|
||||||
{ name = "psutil" },
|
{ name = "psutil" },
|
||||||
@@ -2411,7 +2336,6 @@ requires-dist = [
|
|||||||
{ name = "evaluate" },
|
{ name = "evaluate" },
|
||||||
{ name = "flask" },
|
{ name = "flask" },
|
||||||
{ name = "flask-compress" },
|
{ name = "flask-compress" },
|
||||||
{ name = "gitignore-parser", specifier = ">=0.1.12" },
|
|
||||||
{ name = "huggingface-hub", marker = "extra == 'dev'", specifier = ">=0.20.0" },
|
{ name = "huggingface-hub", marker = "extra == 'dev'", specifier = ">=0.20.0" },
|
||||||
{ name = "ipykernel", specifier = "==6.29.5" },
|
{ name = "ipykernel", specifier = "==6.29.5" },
|
||||||
{ name = "leann-backend-diskann", marker = "extra == 'diskann'", editable = "packages/leann-backend-diskann" },
|
{ name = "leann-backend-diskann", marker = "extra == 'diskann'", editable = "packages/leann-backend-diskann" },
|
||||||
@@ -2427,13 +2351,11 @@ requires-dist = [
|
|||||||
{ name = "mlx", marker = "sys_platform == 'darwin'", specifier = ">=0.26.3" },
|
{ name = "mlx", marker = "sys_platform == 'darwin'", specifier = ">=0.26.3" },
|
||||||
{ name = "mlx-lm", marker = "sys_platform == 'darwin'", specifier = ">=0.26.0" },
|
{ name = "mlx-lm", marker = "sys_platform == 'darwin'", specifier = ">=0.26.0" },
|
||||||
{ name = "msgpack", specifier = ">=1.1.1" },
|
{ name = "msgpack", specifier = ">=1.1.1" },
|
||||||
{ name = "nbconvert", specifier = ">=7.16.6" },
|
|
||||||
{ name = "numpy", specifier = ">=1.26.0" },
|
{ name = "numpy", specifier = ">=1.26.0" },
|
||||||
{ name = "ollama" },
|
{ name = "ollama" },
|
||||||
{ name = "openai", specifier = ">=1.0.0" },
|
{ name = "openai", specifier = ">=1.0.0" },
|
||||||
{ name = "openpyxl", marker = "extra == 'documents'", specifier = ">=3.1.0" },
|
{ name = "openpyxl", marker = "extra == 'documents'", specifier = ">=3.1.0" },
|
||||||
{ name = "pandas", marker = "extra == 'documents'", specifier = ">=2.2.0" },
|
{ name = "pandas", marker = "extra == 'documents'", specifier = ">=2.2.0" },
|
||||||
{ name = "pathspec", specifier = ">=0.12.1" },
|
|
||||||
{ name = "pdfplumber", specifier = ">=0.11.0" },
|
{ name = "pdfplumber", specifier = ">=0.11.0" },
|
||||||
{ name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.5.0" },
|
{ name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.5.0" },
|
||||||
{ name = "protobuf", specifier = "==4.25.3" },
|
{ name = "protobuf", specifier = "==4.25.3" },
|
||||||
@@ -3072,18 +2994,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 },
|
{ url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "mistune"
|
|
||||||
version = "3.1.3"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/c4/79/bda47f7dd7c3c55770478d6d02c9960c430b0cf1773b72366ff89126ea31/mistune-3.1.3.tar.gz", hash = "sha256:a7035c21782b2becb6be62f8f25d3df81ccb4d6fa477a6525b15af06539f02a0", size = 94347 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/01/4d/23c4e4f09da849e127e9f123241946c23c1e30f45a88366879e064211815/mistune-3.1.3-py3-none-any.whl", hash = "sha256:1a32314113cff28aa6432e99e522677c8587fd83e3d51c29b82a52409c842bd9", size = 53410 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "mlx"
|
name = "mlx"
|
||||||
version = "0.27.1"
|
version = "0.27.1"
|
||||||
@@ -3354,62 +3264,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 },
|
{ url = "https://files.pythonhosted.org/packages/79/7b/2c79738432f5c924bef5071f933bcc9efd0473bac3b4aa584a6f7c1c8df8/mypy_extensions-1.1.0-py3-none-any.whl", hash = "sha256:1be4cccdb0f2482337c4743e60421de3a356cd97508abadd57d47403e94f5505", size = 4963 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nbclient"
|
|
||||||
version = "0.10.2"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "jupyter-client" },
|
|
||||||
{ name = "jupyter-core" },
|
|
||||||
{ name = "nbformat" },
|
|
||||||
{ name = "traitlets" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/87/66/7ffd18d58eae90d5721f9f39212327695b749e23ad44b3881744eaf4d9e8/nbclient-0.10.2.tar.gz", hash = "sha256:90b7fc6b810630db87a6d0c2250b1f0ab4cf4d3c27a299b0cde78a4ed3fd9193", size = 62424 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/34/6d/e7fa07f03a4a7b221d94b4d586edb754a9b0dc3c9e2c93353e9fa4e0d117/nbclient-0.10.2-py3-none-any.whl", hash = "sha256:4ffee11e788b4a27fabeb7955547e4318a5298f34342a4bfd01f2e1faaeadc3d", size = 25434 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nbconvert"
|
|
||||||
version = "7.16.6"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "beautifulsoup4" },
|
|
||||||
{ name = "bleach", extra = ["css"] },
|
|
||||||
{ name = "defusedxml" },
|
|
||||||
{ name = "importlib-metadata", marker = "python_full_version < '3.10'" },
|
|
||||||
{ name = "jinja2" },
|
|
||||||
{ name = "jupyter-core" },
|
|
||||||
{ name = "jupyterlab-pygments" },
|
|
||||||
{ name = "markupsafe" },
|
|
||||||
{ name = "mistune" },
|
|
||||||
{ name = "nbclient" },
|
|
||||||
{ name = "nbformat" },
|
|
||||||
{ name = "packaging" },
|
|
||||||
{ name = "pandocfilters" },
|
|
||||||
{ name = "pygments" },
|
|
||||||
{ name = "traitlets" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/a3/59/f28e15fc47ffb73af68a8d9b47367a8630d76e97ae85ad18271b9db96fdf/nbconvert-7.16.6.tar.gz", hash = "sha256:576a7e37c6480da7b8465eefa66c17844243816ce1ccc372633c6b71c3c0f582", size = 857715 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/cc/9a/cd673b2f773a12c992f41309ef81b99da1690426bd2f96957a7ade0d3ed7/nbconvert-7.16.6-py3-none-any.whl", hash = "sha256:1375a7b67e0c2883678c48e506dc320febb57685e5ee67faa51b18a90f3a712b", size = 258525 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "nbformat"
|
|
||||||
version = "5.10.4"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "fastjsonschema" },
|
|
||||||
{ name = "jsonschema" },
|
|
||||||
{ name = "jupyter-core" },
|
|
||||||
{ name = "traitlets" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/6d/fd/91545e604bc3dad7dca9ed03284086039b294c6b3d75c0d2fa45f9e9caf3/nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a", size = 142749 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a9/82/0340caa499416c78e5d8f5f05947ae4bc3cba53c9f038ab6e9ed964e22f1/nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b", size = 78454 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "nest-asyncio"
|
name = "nest-asyncio"
|
||||||
version = "1.6.0"
|
version = "1.6.0"
|
||||||
@@ -3929,15 +3783,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/2f/49/5c30646e96c684570925b772eac4eb0a8cb0ca590fa978f56c5d3ae73ea1/pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e", size = 11618011 },
|
{ url = "https://files.pythonhosted.org/packages/2f/49/5c30646e96c684570925b772eac4eb0a8cb0ca590fa978f56c5d3ae73ea1/pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e", size = 11618011 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "pandocfilters"
|
|
||||||
version = "1.5.1"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/70/6f/3dd4940bbe001c06a65f88e36bad298bc7a0de5036115639926b0c5c0458/pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e", size = 8454 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ef/af/4fbc8cab944db5d21b7e2a5b8e9211a03a79852b1157e2c102fcc61ac440/pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc", size = 8663 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "parso"
|
name = "parso"
|
||||||
version = "0.8.4"
|
version = "0.8.4"
|
||||||
@@ -4925,20 +4770,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/ee/21/c8726b1738d72c7f1602a6720996c4c227754b12335ad84e7db1300f8363/pyzstd-0.17.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a67d7ef18715875b31127eb90075c03ced722fd87902b34bca4b807a2ce1e4d9", size = 241664 },
|
{ url = "https://files.pythonhosted.org/packages/ee/21/c8726b1738d72c7f1602a6720996c4c227754b12335ad84e7db1300f8363/pyzstd-0.17.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a67d7ef18715875b31127eb90075c03ced722fd87902b34bca4b807a2ce1e4d9", size = 241664 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "referencing"
|
|
||||||
version = "0.36.2"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "attrs" },
|
|
||||||
{ name = "rpds-py" },
|
|
||||||
{ name = "typing-extensions", marker = "python_full_version < '3.13'" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/2f/db/98b5c277be99dd18bfd91dd04e1b759cad18d1a338188c936e92f921c7e2/referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa", size = 74744 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c1/b1/3baf80dc6d2b7bc27a95a67752d0208e410351e3feb4eb78de5f77454d8d/referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0", size = 26775 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "2024.11.6"
|
version = "2024.11.6"
|
||||||
@@ -5040,168 +4871,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847 },
|
{ url = "https://files.pythonhosted.org/packages/7c/e4/56027c4a6b4ae70ca9de302488c5ca95ad4a39e190093d6c1a8ace08341b/requests-2.32.4-py3-none-any.whl", hash = "sha256:27babd3cda2a6d50b30443204ee89830707d396671944c998b5975b031ac2b2c", size = 64847 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "rpds-py"
|
|
||||||
version = "0.27.0"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/1e/d9/991a0dee12d9fc53ed027e26a26a64b151d77252ac477e22666b9688bc16/rpds_py-0.27.0.tar.gz", hash = "sha256:8b23cf252f180cda89220b378d917180f29d313cd6a07b2431c0d3b776aae86f", size = 27420 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/75/2d/ad2e37dee3f45580f7fa0066c412a521f9bee53d2718b0e9436d308a1ecd/rpds_py-0.27.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:130c1ffa5039a333f5926b09e346ab335f0d4ec393b030a18549a7c7e7c2cea4", size = 371511 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f5/67/57b4b2479193fde9dd6983a13c2550b5f9c3bcdf8912dffac2068945eb14/rpds_py-0.27.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a4cf32a26fa744101b67bfd28c55d992cd19438aff611a46cac7f066afca8fd4", size = 354718 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a3/be/c2b95ec4b813eb11f3a3c3d22f22bda8d3a48a074a0519cde968c4d102cf/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64a0fe3f334a40b989812de70160de6b0ec7e3c9e4a04c0bbc48d97c5d3600ae", size = 381518 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a5/d2/5a7279bc2b93b20bd50865a2269016238cee45f7dc3cc33402a7f41bd447/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a0ff7ee28583ab30a52f371b40f54e7138c52ca67f8ca17ccb7ccf0b383cb5f", size = 396694 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/65/e9/bac8b3714bd853c5bcb466e04acfb9a5da030d77e0ddf1dfad9afb791c31/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:15ea4d2e182345dd1b4286593601d766411b43f868924afe297570658c31a62b", size = 514813 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1d/aa/293115e956d7d13b7d2a9e9a4121f74989a427aa125f00ce4426ca8b7b28/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:36184b44bf60a480863e51021c26aca3dfe8dd2f5eeabb33622b132b9d8b8b54", size = 402246 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/88/59/2d6789bb898fb3e2f0f7b82b7bcf27f579ebcb6cc36c24f4e208f7f58a5b/rpds_py-0.27.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9b78430703cfcf5f5e86eb74027a1ed03a93509273d7c705babb547f03e60016", size = 383661 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0c/55/add13a593a7a81243a9eed56d618d3d427be5dc1214931676e3f695dfdc1/rpds_py-0.27.0-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:dbd749cff1defbde270ca346b69b3baf5f1297213ef322254bf2a28537f0b046", size = 401691 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/04/09/3e8b2aad494ffaca571e4e19611a12cc18fcfd756d9274f3871a2d822445/rpds_py-0.27.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6bde37765564cd22a676dd8101b657839a1854cfaa9c382c5abf6ff7accfd4ae", size = 416529 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a4/6d/bd899234728f1d8f72c9610f50fdf1c140ecd0a141320e1f1d0f6b20595d/rpds_py-0.27.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:1d66f45b9399036e890fb9c04e9f70c33857fd8f58ac8db9f3278cfa835440c3", size = 558673 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/79/f4/f3e02def5193fb899d797c232f90d6f8f0f2b9eca2faef6f0d34cbc89b2e/rpds_py-0.27.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:d85d784c619370d9329bbd670f41ff5f2ae62ea4519761b679d0f57f0f0ee267", size = 588426 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e3/0c/88e716cd8fd760e5308835fe298255830de4a1c905fd51760b9bb40aa965/rpds_py-0.27.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5df559e9e7644d9042f626f2c3997b555f347d7a855a15f170b253f6c5bfe358", size = 554552 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/2b/a9/0a8243c182e7ac59b901083dff7e671feba6676a131bfff3f8d301cd2b36/rpds_py-0.27.0-cp310-cp310-win32.whl", hash = "sha256:b8a4131698b6992b2a56015f51646711ec5d893a0b314a4b985477868e240c87", size = 218081 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0f/e7/202ff35852312760148be9e08fe2ba6900aa28e7a46940a313eae473c10c/rpds_py-0.27.0-cp310-cp310-win_amd64.whl", hash = "sha256:cbc619e84a5e3ab2d452de831c88bdcad824414e9c2d28cd101f94dbdf26329c", size = 230077 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b4/c1/49d515434c1752e40f5e35b985260cf27af052593378580a2f139a5be6b8/rpds_py-0.27.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:dbc2ab5d10544eb485baa76c63c501303b716a5c405ff2469a1d8ceffaabf622", size = 371577 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e1/6d/bf2715b2fee5087fa13b752b5fd573f1a93e4134c74d275f709e38e54fe7/rpds_py-0.27.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7ec85994f96a58cf7ed288caa344b7fe31fd1d503bdf13d7331ead5f70ab60d5", size = 354959 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a3/5c/e7762808c746dd19733a81373c10da43926f6a6adcf4920a21119697a60a/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:190d7285cd3bb6d31d37a0534d7359c1ee191eb194c511c301f32a4afa5a1dd4", size = 381485 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/40/51/0d308eb0b558309ca0598bcba4243f52c4cd20e15fe991b5bd75824f2e61/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c10d92fb6d7fd827e44055fcd932ad93dac6a11e832d51534d77b97d1d85400f", size = 396816 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/5c/aa/2d585ec911d78f66458b2c91252134ca0c7c70f687a72c87283173dc0c96/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dd2c1d27ebfe6a015cfa2005b7fe8c52d5019f7bbdd801bc6f7499aab9ae739e", size = 514950 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0b/ef/aced551cc1148179557aed84343073adadf252c91265263ee6203458a186/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4790c9d5dd565ddb3e9f656092f57268951398cef52e364c405ed3112dc7c7c1", size = 402132 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/4b/ac/cf644803d8d417653fe2b3604186861d62ea6afaef1b2284045741baef17/rpds_py-0.27.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4300e15e7d03660f04be84a125d1bdd0e6b2f674bc0723bc0fd0122f1a4585dc", size = 383660 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c9/ec/caf47c55ce02b76cbaeeb2d3b36a73da9ca2e14324e3d75cf72b59dcdac5/rpds_py-0.27.0-cp311-cp311-manylinux_2_31_riscv64.whl", hash = "sha256:59195dc244fc183209cf8a93406889cadde47dfd2f0a6b137783aa9c56d67c85", size = 401730 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0b/71/c1f355afdcd5b99ffc253422aa4bdcb04ccf1491dcd1bda3688a0c07fd61/rpds_py-0.27.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fae4a01ef8c4cb2bbe92ef2063149596907dc4a881a8d26743b3f6b304713171", size = 416122 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/38/0f/f4b5b1eda724ed0e04d2b26d8911cdc131451a7ee4c4c020a1387e5c6ded/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e3dc8d4ede2dbae6c0fc2b6c958bf51ce9fd7e9b40c0f5b8835c3fde44f5807d", size = 558771 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/93/c0/5f8b834db2289ab48d5cffbecbb75e35410103a77ac0b8da36bf9544ec1c/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:c3782fb753aa825b4ccabc04292e07897e2fd941448eabf666856c5530277626", size = 587876 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/d2/dd/1a1df02ab8eb970115cff2ae31a6f73916609b900dc86961dc382b8c2e5e/rpds_py-0.27.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:887ab1f12b0d227e9260558a4a2320024b20102207ada65c43e1ffc4546df72e", size = 554359 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a1/e4/95a014ab0d51ab6e3bebbdb476a42d992d2bbf9c489d24cff9fda998e925/rpds_py-0.27.0-cp311-cp311-win32.whl", hash = "sha256:5d6790ff400254137b81b8053b34417e2c46921e302d655181d55ea46df58cf7", size = 218084 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/49/78/f8d5b71ec65a0376b0de31efcbb5528ce17a9b7fdd19c3763303ccfdedec/rpds_py-0.27.0-cp311-cp311-win_amd64.whl", hash = "sha256:e24d8031a2c62f34853756d9208eeafa6b940a1efcbfe36e8f57d99d52bb7261", size = 230085 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e7/d3/84429745184091e06b4cc70f8597408e314c2d2f7f5e13249af9ffab9e3d/rpds_py-0.27.0-cp311-cp311-win_arm64.whl", hash = "sha256:08680820d23df1df0a0260f714d12966bc6c42d02e8055a91d61e03f0c47dda0", size = 222112 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/cd/17/e67309ca1ac993fa1888a0d9b2f5ccc1f67196ace32e76c9f8e1dbbbd50c/rpds_py-0.27.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:19c990fdf5acecbf0623e906ae2e09ce1c58947197f9bced6bbd7482662231c4", size = 362611 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/93/2e/28c2fb84aa7aa5d75933d1862d0f7de6198ea22dfd9a0cca06e8a4e7509e/rpds_py-0.27.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6c27a7054b5224710fcfb1a626ec3ff4f28bcb89b899148c72873b18210e446b", size = 347680 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/44/3e/9834b4c8f4f5fe936b479e623832468aa4bd6beb8d014fecaee9eac6cdb1/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09965b314091829b378b60607022048953e25f0b396c2b70e7c4c81bcecf932e", size = 384600 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/19/78/744123c7b38865a965cd9e6f691fde7ef989a00a256fa8bf15b75240d12f/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:14f028eb47f59e9169bfdf9f7ceafd29dd64902141840633683d0bad5b04ff34", size = 400697 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/32/97/3c3d32fe7daee0a1f1a678b6d4dfb8c4dcf88197fa2441f9da7cb54a8466/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6168af0be75bba990a39f9431cdfae5f0ad501f4af32ae62e8856307200517b8", size = 517781 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b2/be/28f0e3e733680aa13ecec1212fc0f585928a206292f14f89c0b8a684cad1/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab47fe727c13c09d0e6f508e3a49e545008e23bf762a245b020391b621f5b726", size = 406449 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/95/ae/5d15c83e337c082d0367053baeb40bfba683f42459f6ebff63a2fd7e5518/rpds_py-0.27.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fa01b3d5e3b7d97efab65bd3d88f164e289ec323a8c033c5c38e53ee25c007e", size = 386150 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/bf/65/944e95f95d5931112829e040912b25a77b2e7ed913ea5fe5746aa5c1ce75/rpds_py-0.27.0-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:6c135708e987f46053e0a1246a206f53717f9fadfba27174a9769ad4befba5c3", size = 406100 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/21/a4/1664b83fae02894533cd11dc0b9f91d673797c2185b7be0f7496107ed6c5/rpds_py-0.27.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fc327f4497b7087d06204235199daf208fd01c82d80465dc5efa4ec9df1c5b4e", size = 421345 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/7c/26/b7303941c2b0823bfb34c71378249f8beedce57301f400acb04bb345d025/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7e57906e38583a2cba67046a09c2637e23297618dc1f3caddbc493f2be97c93f", size = 561891 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/9b/c8/48623d64d4a5a028fa99576c768a6159db49ab907230edddc0b8468b998b/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:0f4f69d7a4300fbf91efb1fb4916421bd57804c01ab938ab50ac9c4aa2212f03", size = 591756 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b3/51/18f62617e8e61cc66334c9fb44b1ad7baae3438662098efbc55fb3fda453/rpds_py-0.27.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b4c4fbbcff474e1e5f38be1bf04511c03d492d42eec0babda5d03af3b5589374", size = 557088 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/bd/4c/e84c3a276e2496a93d245516be6b49e20499aa8ca1c94d59fada0d79addc/rpds_py-0.27.0-cp312-cp312-win32.whl", hash = "sha256:27bac29bbbf39601b2aab474daf99dbc8e7176ca3389237a23944b17f8913d97", size = 221926 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/83/89/9d0fbcef64340db0605eb0a0044f258076f3ae0a3b108983b2c614d96212/rpds_py-0.27.0-cp312-cp312-win_amd64.whl", hash = "sha256:8a06aa1197ec0281eb1d7daf6073e199eb832fe591ffa329b88bae28f25f5fe5", size = 233235 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c9/b0/e177aa9f39cbab060f96de4a09df77d494f0279604dc2f509263e21b05f9/rpds_py-0.27.0-cp312-cp312-win_arm64.whl", hash = "sha256:e14aab02258cb776a108107bd15f5b5e4a1bbaa61ef33b36693dfab6f89d54f9", size = 223315 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/81/d2/dfdfd42565a923b9e5a29f93501664f5b984a802967d48d49200ad71be36/rpds_py-0.27.0-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:443d239d02d9ae55b74015234f2cd8eb09e59fbba30bf60baeb3123ad4c6d5ff", size = 362133 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ac/4a/0a2e2460c4b66021d349ce9f6331df1d6c75d7eea90df9785d333a49df04/rpds_py-0.27.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:b8a7acf04fda1f30f1007f3cc96d29d8cf0a53e626e4e1655fdf4eabc082d367", size = 347128 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/35/8d/7d1e4390dfe09d4213b3175a3f5a817514355cb3524593380733204f20b9/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9d0f92b78cfc3b74a42239fdd8c1266f4715b573204c234d2f9fc3fc7a24f185", size = 384027 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c1/65/78499d1a62172891c8cd45de737b2a4b84a414b6ad8315ab3ac4945a5b61/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ce4ed8e0c7dbc5b19352b9c2c6131dd23b95fa8698b5cdd076307a33626b72dc", size = 399973 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/10/a1/1c67c1d8cc889107b19570bb01f75cf49852068e95e6aee80d22915406fc/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fde355b02934cc6b07200cc3b27ab0c15870a757d1a72fd401aa92e2ea3c6bfe", size = 515295 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/df/27/700ec88e748436b6c7c4a2262d66e80f8c21ab585d5e98c45e02f13f21c0/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:13bbc4846ae4c993f07c93feb21a24d8ec637573d567a924b1001e81c8ae80f9", size = 406737 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/33/cc/6b0ee8f0ba3f2df2daac1beda17fde5cf10897a7d466f252bd184ef20162/rpds_py-0.27.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be0744661afbc4099fef7f4e604e7f1ea1be1dd7284f357924af12a705cc7d5c", size = 385898 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e8/7e/c927b37d7d33c0a0ebf249cc268dc2fcec52864c1b6309ecb960497f2285/rpds_py-0.27.0-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:069e0384a54f427bd65d7fda83b68a90606a3835901aaff42185fcd94f5a9295", size = 405785 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/5b/d2/8ed50746d909dcf402af3fa58b83d5a590ed43e07251d6b08fad1a535ba6/rpds_py-0.27.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4bc262ace5a1a7dc3e2eac2fa97b8257ae795389f688b5adf22c5db1e2431c43", size = 419760 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/d3/60/2b2071aee781cb3bd49f94d5d35686990b925e9b9f3e3d149235a6f5d5c1/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:2fe6e18e5c8581f0361b35ae575043c7029d0a92cb3429e6e596c2cdde251432", size = 561201 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/98/1f/27b67304272521aaea02be293fecedce13fa351a4e41cdb9290576fc6d81/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d93ebdb82363d2e7bec64eecdc3632b59e84bd270d74fe5be1659f7787052f9b", size = 591021 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/db/9b/a2fadf823164dd085b1f894be6443b0762a54a7af6f36e98e8fcda69ee50/rpds_py-0.27.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0954e3a92e1d62e83a54ea7b3fdc9efa5d61acef8488a8a3d31fdafbfb00460d", size = 556368 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/24/f3/6d135d46a129cda2e3e6d4c5e91e2cc26ea0428c6cf152763f3f10b6dd05/rpds_py-0.27.0-cp313-cp313-win32.whl", hash = "sha256:2cff9bdd6c7b906cc562a505c04a57d92e82d37200027e8d362518df427f96cd", size = 221236 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c5/44/65d7494f5448ecc755b545d78b188440f81da98b50ea0447ab5ebfdf9bd6/rpds_py-0.27.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc79d192fb76fc0c84f2c58672c17bbbc383fd26c3cdc29daae16ce3d927e8b2", size = 232634 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/70/d9/23852410fadab2abb611733933401de42a1964ce6600a3badae35fbd573e/rpds_py-0.27.0-cp313-cp313-win_arm64.whl", hash = "sha256:5b3a5c8089eed498a3af23ce87a80805ff98f6ef8f7bdb70bd1b7dae5105f6ac", size = 222783 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/15/75/03447917f78512b34463f4ef11066516067099a0c466545655503bed0c77/rpds_py-0.27.0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:90fb790138c1a89a2e58c9282fe1089638401f2f3b8dddd758499041bc6e0774", size = 359154 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/6b/fc/4dac4fa756451f2122ddaf136e2c6aeb758dc6fdbe9ccc4bc95c98451d50/rpds_py-0.27.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:010c4843a3b92b54373e3d2291a7447d6c3fc29f591772cc2ea0e9f5c1da434b", size = 343909 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/7b/81/723c1ed8e6f57ed9d8c0c07578747a2d3d554aaefc1ab89f4e42cfeefa07/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9ce7a9e967afc0a2af7caa0d15a3e9c1054815f73d6a8cb9225b61921b419bd", size = 379340 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/98/16/7e3740413de71818ce1997df82ba5f94bae9fff90c0a578c0e24658e6201/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:aa0bf113d15e8abdfee92aa4db86761b709a09954083afcb5bf0f952d6065fdb", size = 391655 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e0/63/2a9f510e124d80660f60ecce07953f3f2d5f0b96192c1365443859b9c87f/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eb91d252b35004a84670dfeafadb042528b19842a0080d8b53e5ec1128e8f433", size = 513017 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/2c/4e/cf6ff311d09776c53ea1b4f2e6700b9d43bb4e99551006817ade4bbd6f78/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:db8a6313dbac934193fc17fe7610f70cd8181c542a91382531bef5ed785e5615", size = 402058 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/88/11/5e36096d474cb10f2a2d68b22af60a3bc4164fd8db15078769a568d9d3ac/rpds_py-0.27.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce96ab0bdfcef1b8c371ada2100767ace6804ea35aacce0aef3aeb4f3f499ca8", size = 383474 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/db/a2/3dff02805b06058760b5eaa6d8cb8db3eb3e46c9e452453ad5fc5b5ad9fe/rpds_py-0.27.0-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:7451ede3560086abe1aa27dcdcf55cd15c96b56f543fb12e5826eee6f721f858", size = 400067 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/67/87/eed7369b0b265518e21ea836456a4ed4a6744c8c12422ce05bce760bb3cf/rpds_py-0.27.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:32196b5a99821476537b3f7732432d64d93a58d680a52c5e12a190ee0135d8b5", size = 412085 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/8b/48/f50b2ab2fbb422fbb389fe296e70b7a6b5ea31b263ada5c61377e710a924/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a029be818059870664157194e46ce0e995082ac49926f1423c1f058534d2aaa9", size = 555928 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/98/41/b18eb51045d06887666c3560cd4bbb6819127b43d758f5adb82b5f56f7d1/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:3841f66c1ffdc6cebce8aed64e36db71466f1dc23c0d9a5592e2a782a3042c79", size = 585527 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/be/03/a3dd6470fc76499959b00ae56295b76b4bdf7c6ffc60d62006b1217567e1/rpds_py-0.27.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:42894616da0fc0dcb2ec08a77896c3f56e9cb2f4b66acd76fc8992c3557ceb1c", size = 554211 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/bf/d1/ee5fd1be395a07423ac4ca0bcc05280bf95db2b155d03adefeb47d5ebf7e/rpds_py-0.27.0-cp313-cp313t-win32.whl", hash = "sha256:b1fef1f13c842a39a03409e30ca0bf87b39a1e2a305a9924deadb75a43105d23", size = 216624 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1c/94/4814c4c858833bf46706f87349c37ca45e154da7dbbec9ff09f1abeb08cc/rpds_py-0.27.0-cp313-cp313t-win_amd64.whl", hash = "sha256:183f5e221ba3e283cd36fdfbe311d95cd87699a083330b4f792543987167eff1", size = 230007 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0e/a5/8fffe1c7dc7c055aa02df310f9fb71cfc693a4d5ccc5de2d3456ea5fb022/rpds_py-0.27.0-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:f3cd110e02c5bf17d8fb562f6c9df5c20e73029d587cf8602a2da6c5ef1e32cb", size = 362595 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/bc/c7/4e4253fd2d4bb0edbc0b0b10d9f280612ca4f0f990e3c04c599000fe7d71/rpds_py-0.27.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:8d0e09cf4863c74106b5265c2c310f36146e2b445ff7b3018a56799f28f39f6f", size = 347252 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f3/c8/3d1a954d30f0174dd6baf18b57c215da03cf7846a9d6e0143304e784cddc/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:64f689ab822f9b5eb6dfc69893b4b9366db1d2420f7db1f6a2adf2a9ca15ad64", size = 384886 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e0/52/3c5835f2df389832b28f9276dd5395b5a965cea34226e7c88c8fbec2093c/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e36c80c49853b3ffda7aa1831bf175c13356b210c73128c861f3aa93c3cc4015", size = 399716 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/40/73/176e46992461a1749686a2a441e24df51ff86b99c2d34bf39f2a5273b987/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6de6a7f622860af0146cb9ee148682ff4d0cea0b8fd3ad51ce4d40efb2f061d0", size = 517030 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/79/2a/7266c75840e8c6e70effeb0d38922a45720904f2cd695e68a0150e5407e2/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4045e2fc4b37ec4b48e8907a5819bdd3380708c139d7cc358f03a3653abedb89", size = 408448 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e6/5f/a7efc572b8e235093dc6cf39f4dbc8a7f08e65fdbcec7ff4daeb3585eef1/rpds_py-0.27.0-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9da162b718b12c4219eeeeb68a5b7552fbc7aadedf2efee440f88b9c0e54b45d", size = 387320 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a2/eb/9ff6bc92efe57cf5a2cb74dee20453ba444b6fdc85275d8c99e0d27239d1/rpds_py-0.27.0-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:0665be515767dc727ffa5f74bd2ef60b0ff85dad6bb8f50d91eaa6b5fb226f51", size = 407414 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/fb/bd/3b9b19b00d5c6e1bd0f418c229ab0f8d3b110ddf7ec5d9d689ef783d0268/rpds_py-0.27.0-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:203f581accef67300a942e49a37d74c12ceeef4514874c7cede21b012613ca2c", size = 420766 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/17/6b/521a7b1079ce16258c70805166e3ac6ec4ee2139d023fe07954dc9b2d568/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7873b65686a6471c0037139aa000d23fe94628e0daaa27b6e40607c90e3f5ec4", size = 562409 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/8b/bf/65db5bfb14ccc55e39de8419a659d05a2a9cd232f0a699a516bb0991da7b/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:249ab91ceaa6b41abc5f19513cb95b45c6f956f6b89f1fe3d99c81255a849f9e", size = 590793 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/db/b8/82d368b378325191ba7aae8f40f009b78057b598d4394d1f2cdabaf67b3f/rpds_py-0.27.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d2f184336bc1d6abfaaa1262ed42739c3789b1e3a65a29916a615307d22ffd2e", size = 558178 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f6/ff/f270bddbfbc3812500f8131b1ebbd97afd014cd554b604a3f73f03133a36/rpds_py-0.27.0-cp314-cp314-win32.whl", hash = "sha256:d3c622c39f04d5751408f5b801ecb527e6e0a471b367f420a877f7a660d583f6", size = 222355 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/bf/20/fdab055b1460c02ed356a0e0b0a78c1dd32dc64e82a544f7b31c9ac643dc/rpds_py-0.27.0-cp314-cp314-win_amd64.whl", hash = "sha256:cf824aceaeffff029ccfba0da637d432ca71ab21f13e7f6f5179cd88ebc77a8a", size = 234007 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/4d/a8/694c060005421797a3be4943dab8347c76c2b429a9bef68fb2c87c9e70c7/rpds_py-0.27.0-cp314-cp314-win_arm64.whl", hash = "sha256:86aca1616922b40d8ac1b3073a1ead4255a2f13405e5700c01f7c8d29a03972d", size = 223527 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1e/f9/77f4c90f79d2c5ca8ce6ec6a76cb4734ee247de6b3a4f337e289e1f00372/rpds_py-0.27.0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:341d8acb6724c0c17bdf714319c393bb27f6d23d39bc74f94221b3e59fc31828", size = 359469 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c0/22/b97878d2f1284286fef4172069e84b0b42b546ea7d053e5fb7adb9ac6494/rpds_py-0.27.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6b96b0b784fe5fd03beffff2b1533dc0d85e92bab8d1b2c24ef3a5dc8fac5669", size = 343960 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b1/b0/dfd55b5bb480eda0578ae94ef256d3061d20b19a0f5e18c482f03e65464f/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c431bfb91478d7cbe368d0a699978050d3b112d7f1d440a41e90faa325557fd", size = 380201 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/28/22/e1fa64e50d58ad2b2053077e3ec81a979147c43428de9e6de68ddf6aff4e/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:20e222a44ae9f507d0f2678ee3dd0c45ec1e930f6875d99b8459631c24058aec", size = 392111 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/49/f9/43ab7a43e97aedf6cea6af70fdcbe18abbbc41d4ae6cdec1bfc23bbad403/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:184f0d7b342967f6cda94a07d0e1fae177d11d0b8f17d73e06e36ac02889f303", size = 515863 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/38/9b/9bd59dcc636cd04d86a2d20ad967770bf348f5eb5922a8f29b547c074243/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a00c91104c173c9043bc46f7b30ee5e6d2f6b1149f11f545580f5d6fdff42c0b", size = 402398 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/71/bf/f099328c6c85667aba6b66fa5c35a8882db06dcd462ea214be72813a0dd2/rpds_py-0.27.0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7a37dd208f0d658e0487522078b1ed68cd6bce20ef4b5a915d2809b9094b410", size = 384665 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a9/c5/9c1f03121ece6634818490bd3c8be2c82a70928a19de03467fb25a3ae2a8/rpds_py-0.27.0-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:92f3b3ec3e6008a1fe00b7c0946a170f161ac00645cde35e3c9a68c2475e8156", size = 400405 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b5/b8/e25d54af3e63ac94f0c16d8fe143779fe71ff209445a0c00d0f6984b6b2c/rpds_py-0.27.0-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:a1b3db5fae5cbce2131b7420a3f83553d4d89514c03d67804ced36161fe8b6b2", size = 413179 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f9/d1/406b3316433fe49c3021546293a04bc33f1478e3ec7950215a7fce1a1208/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:5355527adaa713ab693cbce7c1e0ec71682f599f61b128cf19d07e5c13c9b1f1", size = 556895 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/5f/bc/3697c0c21fcb9a54d46ae3b735eb2365eea0c2be076b8f770f98e07998de/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:fcc01c57ce6e70b728af02b2401c5bc853a9e14eb07deda30624374f0aebfe42", size = 585464 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/63/09/ee1bb5536f99f42c839b177d552f6114aa3142d82f49cef49261ed28dbe0/rpds_py-0.27.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3001013dae10f806380ba739d40dee11db1ecb91684febb8406a87c2ded23dae", size = 555090 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/7d/2c/363eada9e89f7059199d3724135a86c47082cbf72790d6ba2f336d146ddb/rpds_py-0.27.0-cp314-cp314t-win32.whl", hash = "sha256:0f401c369186a5743694dd9fc08cba66cf70908757552e1f714bfc5219c655b5", size = 218001 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e2/3f/d6c216ed5199c9ef79e2a33955601f454ed1e7420a93b89670133bca5ace/rpds_py-0.27.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8a1dca5507fa1337f75dcd5070218b20bc68cf8844271c923c1b79dfcbc20391", size = 230993 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a3/2e/82fee0cb7142bc32a9ce586eadd24a945257c016902d575bb377ad5feb10/rpds_py-0.27.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:e0d7151a1bd5d0a203a5008fc4ae51a159a610cb82ab0a9b2c4d80241745582e", size = 371495 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f9/b5/b421756c7e5cc1d2bb438a34b16f750363d0d87caf2bfa6f2326423c42e5/rpds_py-0.27.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42ccc57ff99166a55a59d8c7d14f1a357b7749f9ed3584df74053fd098243451", size = 354823 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f9/4a/63337bbabfa38d4094144d0e689758e8452372fd3e45359b806fc1b4c022/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e377e4cf8795cdbdff75b8f0223d7b6c68ff4fef36799d88ccf3a995a91c0112", size = 381538 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/33/8b/14eb61fb9a5bb830d28c548e3e67046fd04cae06c2ce6afe7f30aba7f7f0/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:79af163a4b40bbd8cfd7ca86ec8b54b81121d3b213b4435ea27d6568bcba3e9d", size = 396724 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/03/54/47faf6aa4040443b108b24ae08e9db6fe6daaa8140b696f905833f325293/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b2eff8ee57c5996b0d2a07c3601fb4ce5fbc37547344a26945dd9e5cbd1ed27a", size = 517084 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/0b/88/a78dbacc9a96e3ea7e83d9bed8f272754e618c629ed6a9f8e2a506c84419/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7cf9bc4508efb18d8dff6934b602324eb9f8c6644749627ce001d6f38a490889", size = 402397 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/6b/88/268c6422c0c3a0f01bf6e79086f6e4dbc6a2e60a6e95413ad17e3392ec0a/rpds_py-0.27.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05284439ebe7d9f5f5a668d4d8a0a1d851d16f7d47c78e1fab968c8ad30cab04", size = 383570 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/9c/1a/34f5a2459b9752cc08e02c3845c8f570222f7dbd48c7baac4b827701a40e/rpds_py-0.27.0-cp39-cp39-manylinux_2_31_riscv64.whl", hash = "sha256:1321bce595ad70e80f97f998db37356b2e22cf98094eba6fe91782e626da2f71", size = 401771 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/4e/9b/16979115f2ec783ca06454a141a0f32f082763ef874675c5f756e6e76fcd/rpds_py-0.27.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:737005088449ddd3b3df5a95476ee1c2c5c669f5c30eed909548a92939c0e12d", size = 416215 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/81/0b/0305df88fb22db8efe81753ce4ec51b821555448fd94ec77ae4e5dfd57b7/rpds_py-0.27.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:9b2a4e17bfd68536c3b801800941c95a1d4a06e3cada11c146093ba939d9638d", size = 558573 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/84/9a/c48be4da43a556495cf66d6bf71a16e8e3e22ae8e724b678e430521d0702/rpds_py-0.27.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:dc6b0d5a1ea0318ef2def2b6a55dccf1dcaf77d605672347271ed7b829860765", size = 587956 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/76/95/deb1111abde461330c4dad22b14347d064161fb7cb249746a06accc07633/rpds_py-0.27.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4c3f8a0d4802df34fcdbeb3dfe3a4d8c9a530baea8fafdf80816fcaac5379d83", size = 554493 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/cb/16/5342d91917f26da91fc193932d9fbf422e2903aaee9bd3c6ecb4875ef17f/rpds_py-0.27.0-cp39-cp39-win32.whl", hash = "sha256:699c346abc73993962cac7bb4f02f58e438840fa5458a048d3a178a7a670ba86", size = 218302 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/fb/a3/0346108a47efe41b50d8781688b7fb16b18d252053486c932d10b18977c9/rpds_py-0.27.0-cp39-cp39-win_amd64.whl", hash = "sha256:be806e2961cd390a89d6c3ce8c2ae34271cfcd05660f716257838bb560f1c3b6", size = 229977 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/47/55/287068956f9ba1cb40896d291213f09fdd4527630709058b45a592bc09dc/rpds_py-0.27.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:46f48482c1a4748ab2773f75fffbdd1951eb59794e32788834b945da857c47a8", size = 371566 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a2/fb/443af59cbe552e89680bb0f1d1ba47f6387b92083e28a45b8c8863b86c5a/rpds_py-0.27.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:419dd9c98bcc9fb0242be89e0c6e922df333b975d4268faa90d58499fd9c9ebe", size = 355781 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ad/f0/35f48bb073b5ca42b1dcc55cb148f4a3bd4411a3e584f6a18d26f0ea8832/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55d42a0ef2bdf6bc81e1cc2d49d12460f63c6ae1423c4f4851b828e454ccf6f1", size = 382575 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/51/e1/5f5296a21d1189f0f116a938af2e346d83172bf814d373695e54004a936f/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2e39169ac6aae06dd79c07c8a69d9da867cef6a6d7883a0186b46bb46ccfb0c3", size = 397435 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/97/79/3af99b7852b2b55cad8a08863725cbe9dc14781bcf7dc6ecead0c3e1dc54/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:935afcdea4751b0ac918047a2df3f720212892347767aea28f5b3bf7be4f27c0", size = 514861 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/df/3e/11fd6033708ed3ae0e6947bb94f762f56bb46bf59a1b16eef6944e8a62ee/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8de567dec6d451649a781633d36f5c7501711adee329d76c095be2178855b042", size = 402776 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b7/89/f9375ceaa996116de9cbc949874804c7874d42fb258c384c037a46d730b8/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:555ed147cbe8c8f76e72a4c6cd3b7b761cbf9987891b9448808148204aed74a5", size = 384665 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/48/bf/0061e55c6f1f573a63c0f82306b8984ed3b394adafc66854a936d5db3522/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:d2cc2b34f9e1d31ce255174da82902ad75bd7c0d88a33df54a77a22f2ef421ee", size = 402518 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ae/dc/8d506676bfe87b3b683332ec8e6ab2b0be118a3d3595ed021e3274a63191/rpds_py-0.27.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:cb0702c12983be3b2fab98ead349ac63a98216d28dda6f518f52da5498a27a1b", size = 416247 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/2e/02/9a89eea1b75c69e81632de7963076e455b1e00e1cfb46dfdabb055fa03e3/rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:ba783541be46f27c8faea5a6645e193943c17ea2f0ffe593639d906a327a9bcc", size = 559456 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/38/4a/0f3ac4351957847c0d322be6ec72f916e43804a2c1d04e9672ea4a67c315/rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:2406d034635d1497c596c40c85f86ecf2bf9611c1df73d14078af8444fe48031", size = 587778 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c2/8e/39d0d7401095bed5a5ad5ef304fae96383f9bef40ca3f3a0807ff5b68d9d/rpds_py-0.27.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:dea0808153f1fbbad772669d906cddd92100277533a03845de6893cadeffc8be", size = 555247 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e0/04/6b8311e811e620b9eaca67cd80a118ff9159558a719201052a7b2abb88bf/rpds_py-0.27.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d2a81bdcfde4245468f7030a75a37d50400ac2455c3a4819d9d550c937f90ab5", size = 230256 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/59/64/72ab5b911fdcc48058359b0e786e5363e3fde885156116026f1a2ba9a5b5/rpds_py-0.27.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e6491658dd2569f05860bad645569145c8626ac231877b0fb2d5f9bcb7054089", size = 371658 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/6c/4b/90ff04b4da055db53d8fea57640d8d5d55456343a1ec9a866c0ecfe10fd1/rpds_py-0.27.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:bec77545d188f8bdd29d42bccb9191682a46fb2e655e3d1fb446d47c55ac3b8d", size = 355529 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a4/be/527491fb1afcd86fc5ce5812eb37bc70428ee017d77fee20de18155c3937/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25a4aebf8ca02bbb90a9b3e7a463bbf3bee02ab1c446840ca07b1695a68ce424", size = 382822 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e0/a5/dcdb8725ce11e6d0913e6fcf782a13f4b8a517e8acc70946031830b98441/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:44524b96481a4c9b8e6c46d6afe43fa1fb485c261e359fbe32b63ff60e3884d8", size = 397233 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/33/f9/0947920d1927e9f144660590cc38cadb0795d78fe0d9aae0ef71c1513b7c/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:45d04a73c54b6a5fd2bab91a4b5bc8b426949586e61340e212a8484919183859", size = 514892 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1d/ed/d1343398c1417c68f8daa1afce56ef6ce5cc587daaf98e29347b00a80ff2/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:343cf24de9ed6c728abefc5d5c851d5de06497caa7ac37e5e65dd572921ed1b5", size = 402733 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1d/0b/646f55442cd14014fb64d143428f25667a100f82092c90087b9ea7101c74/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7aed8118ae20515974650d08eb724150dc2e20c2814bcc307089569995e88a14", size = 384447 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/4b/15/0596ef7529828e33a6c81ecf5013d1dd33a511a3e0be0561f83079cda227/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:af9d4fd79ee1cc8e7caf693ee02737daabfc0fcf2773ca0a4735b356c8ad6f7c", size = 402502 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/c3/8d/986af3c42f8454a6cafff8729d99fb178ae9b08a9816325ac7a8fa57c0c0/rpds_py-0.27.0-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f0396e894bd1e66c74ecbc08b4f6a03dc331140942c4b1d345dd131b68574a60", size = 416651 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e9/9a/b4ec3629b7b447e896eec574469159b5b60b7781d3711c914748bf32de05/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:59714ab0a5af25d723d8e9816638faf7f4254234decb7d212715c1aa71eee7be", size = 559460 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/61/63/d1e127b40c3e4733b3a6f26ae7a063cdf2bc1caa5272c89075425c7d397a/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_i686.whl", hash = "sha256:88051c3b7d5325409f433c5a40328fcb0685fc04e5db49ff936e910901d10114", size = 588072 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/04/7e/8ffc71a8f6833d9c9fb999f5b0ee736b8b159fd66968e05c7afc2dbcd57e/rpds_py-0.27.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:181bc29e59e5e5e6e9d63b143ff4d5191224d355e246b5a48c88ce6b35c4e466", size = 555083 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a8/fc/ef6386838e0e91d6ba79b741ccce6ca987e89619aa86f418fecf381eba23/rpds_py-0.27.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:9ad08547995a57e74fea6abaf5940d399447935faebbd2612b3b0ca6f987946b", size = 371849 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/2c/f8/f30394aff811bc0f13fab8d8e4b9f880fcb678234eb0af7d2c4b6232f44f/rpds_py-0.27.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:61490d57e82e23b45c66f96184237994bfafa914433b8cd1a9bb57fecfced59d", size = 356437 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/87/56/ed704fc668c9abc56d3686b723e4d6f2585597daf4b68b654ade7c97930d/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7cf5e726b6fa977e428a61880fb108a62f28b6d0c7ef675b117eaff7076df49", size = 382247 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/48/55/6ef2c9b7caae3c1c360d9556a70979e16f21bfb1e94f50f481d224f3b8aa/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:dc662bc9375a6a394b62dfd331874c434819f10ee3902123200dbcf116963f89", size = 397223 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/63/04/8fc2059411daaca733155fc2613cc91dc728d7abe31fd0c0fa4c7ec5ff1a/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:299a245537e697f28a7511d01038c310ac74e8ea213c0019e1fc65f52c0dcb23", size = 516308 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/a4/d0/b79d3fe07c47bfa989139e692f85371f5a0e1376696b173dabe7ac77b7d1/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:be3964f7312ea05ed283b20f87cb533fdc555b2e428cc7be64612c0b2124f08c", size = 401967 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/cd/b1/55014f6da5ec8029d1d7d7d2a884b9d7ad7f217e05bb9cb782f06d8209c4/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:33ba649a6e55ae3808e4c39e01580dc9a9b0d5b02e77b66bb86ef117922b1264", size = 384584 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/86/34/5c5c1a8550ac172dd6cd53925c321363d94b2a1f0b3173743dbbfd87b8ec/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_31_riscv64.whl", hash = "sha256:81f81bbd7cdb4bdc418c09a73809abeda8f263a6bf8f9c7f93ed98b5597af39d", size = 401879 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/35/07/009bbc659388c4c5a256f05f56df207633cda2f5d61a8d54c50c427e435e/rpds_py-0.27.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:11e8e28c0ba0373d052818b600474cfee2fafa6c9f36c8587d217b13ee28ca7d", size = 416908 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/7a/cc/8949c13dc5a05d955cb88909bfac4004805974dec7b0d02543de55e43272/rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:e3acb9c16530362aeaef4e84d57db357002dc5cbfac9a23414c3e73c08301ab2", size = 559105 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/ea/40/574da2033b01d6e2e7fa3b021993321565c6634f9d0021707d210ce35b58/rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:2e307cb5f66c59ede95c00e93cd84190a5b7f3533d7953690b2036780622ba81", size = 588335 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1d/83/72ed1ce357d8c63bde0bba2458a502e7cc4e150e272139161e1d205a9d67/rpds_py-0.27.0-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:f09c9d4c26fa79c1bad927efb05aca2391350b8e61c38cbc0d7d3c814e463124", size = 555094 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/6f/15/fc639de53b3798340233f37959d252311b30d1834b65a02741e3373407fa/rpds_py-0.27.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:af22763a0a1eff106426a6e1f13c4582e0d0ad89c1493ab6c058236174cd6c6a", size = 230031 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ruff"
|
name = "ruff"
|
||||||
version = "0.12.5"
|
version = "0.12.5"
|
||||||
@@ -5839,18 +5508,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/70/22/e8fc1bf9cdecc439b7ddc28a45b976a8c699a38874c070749d855696368a/tiktoken-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:26242ca9dc8b58e875ff4ca078b9a94d2f0813e6a535dcd2205df5d49d927cc7", size = 894215 },
|
{ url = "https://files.pythonhosted.org/packages/70/22/e8fc1bf9cdecc439b7ddc28a45b976a8c699a38874c070749d855696368a/tiktoken-0.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:26242ca9dc8b58e875ff4ca078b9a94d2f0813e6a535dcd2205df5d49d927cc7", size = 894215 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "tinycss2"
|
|
||||||
version = "1.4.0"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
dependencies = [
|
|
||||||
{ name = "webencodings" },
|
|
||||||
]
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/7a/fd/7a5ee21fd08ff70d3d33a5781c255cbe779659bd03278feb98b19ee550f4/tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7", size = 87085 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/e6/34/ebdc18bae6aa14fbee1a08b63c015c72b64868ff7dae68808ab500c492e2/tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289", size = 26610 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokenizers"
|
name = "tokenizers"
|
||||||
version = "0.21.4"
|
version = "0.21.4"
|
||||||
@@ -6143,15 +5800,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 },
|
{ url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "webencodings"
|
|
||||||
version = "0.5.1"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/0b/02/ae6ceac1baeda530866a85075641cec12989bd8d31af6d5ab4a3e8c92f47/webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923", size = 9721 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", size = 11774 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "werkzeug"
|
name = "werkzeug"
|
||||||
version = "3.1.3"
|
version = "3.1.3"
|
||||||
|
|||||||
Reference in New Issue
Block a user