Compare commits

..

1 Commits

Author SHA1 Message Date
Andy Lee
d2299d2679 feat: Enhance CLI with improved list and smart remove commands
- **Better UX**: Current project shown first with clear separation
- **Visual improvements**: Icons (🏠/📂), better formatting, size info
- **Smart guidance**: Context-aware usage examples and getting started tips

- **Safety first**: Always shows ALL matching indexes across projects
- **Intelligent handling**:
  - Single match: Clear location display with cross-project warnings
  - Multiple matches: Interactive selection with final confirmation
- **Prevents accidents**: No more deleting wrong indexes due to name conflicts
- **User-friendly**: 'c' to cancel, clear visual hierarchy, detailed info

- **Clean logging**: Hide debug messages for better CLI experience
- **Comprehensive search**: Always scan all projects for transparency
- **Error handling**: Graceful handling of edge cases and user input

- **Safer**: Eliminates risk of accidental index deletion
- **Clearer**: Users always know what they're operating on
- **Smarter**: Automatic detection and handling of common scenarios
2025-08-15 23:12:21 -07:00
21 changed files with 3931 additions and 4157 deletions

1
.gitattributes vendored Normal file
View File

@@ -0,0 +1 @@
paper_plot/data/big_graph_degree_data.npz filter=lfs diff=lfs merge=lfs -text

View File

@@ -87,7 +87,7 @@ jobs:
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v4
with: with:
ref: ${{ inputs.ref }} ref: ${{ inputs.ref }}
submodules: recursive submodules: recursive
@@ -98,23 +98,21 @@ jobs:
python-version: ${{ matrix.python }} python-version: ${{ matrix.python }}
- name: Install uv - name: Install uv
uses: astral-sh/setup-uv@v6 uses: astral-sh/setup-uv@v4
- name: Install system dependencies (Ubuntu) - name: Install system dependencies (Ubuntu)
if: runner.os == 'Linux' if: runner.os == 'Linux'
run: | run: |
sudo apt-get update sudo apt-get update
sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \ sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \
pkg-config libabsl-dev libaio-dev libprotobuf-dev \ pkg-config libopenblas-dev patchelf libabsl-dev libaio-dev libprotobuf-dev
patchelf
# Install Intel MKL for DiskANN # Install Intel MKL for DiskANN
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
source /opt/intel/oneapi/setvars.sh source /opt/intel/oneapi/setvars.sh
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/latest/linux/compiler/lib/intel64_lin" >> $GITHUB_ENV echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/mkl/latest/lib/intel64" >> $GITHUB_ENV
- name: Install system dependencies (macOS) - name: Install system dependencies (macOS)
if: runner.os == 'macOS' if: runner.os == 'macOS'
@@ -306,53 +304,3 @@ jobs:
with: with:
name: packages-${{ matrix.os }}-py${{ matrix.python }} name: packages-${{ matrix.os }}-py${{ matrix.python }}
path: packages/*/dist/ path: packages/*/dist/
arch-smoke:
name: Arch Linux smoke test (install & import)
needs: build
runs-on: ubuntu-latest
container:
image: archlinux:latest
steps:
- name: Prepare system
run: |
pacman -Syu --noconfirm
pacman -S --noconfirm python python-pip gcc git zlib openssl
- name: Download ALL wheel artifacts from this run
uses: actions/download-artifact@v5
with:
# Don't specify name, download all artifacts
path: ./wheels
- name: Install uv
uses: astral-sh/setup-uv@v6
- name: Create virtual environment and install wheels
run: |
uv venv
source .venv/bin/activate || source .venv/Scripts/activate
uv pip install --find-links wheels leann-core
uv pip install --find-links wheels leann-backend-hnsw
uv pip install --find-links wheels leann-backend-diskann
uv pip install --find-links wheels leann
- name: Import & tiny runtime check
env:
OMP_NUM_THREADS: 1
MKL_NUM_THREADS: 1
run: |
source .venv/bin/activate || source .venv/Scripts/activate
python - <<'PY'
import leann
import leann_backend_hnsw as h
import leann_backend_diskann as d
from leann import LeannBuilder, LeannSearcher
b = LeannBuilder(backend_name="hnsw")
b.add_text("hello arch")
b.build_index("arch_demo.leann")
s = LeannSearcher("arch_demo.leann")
print("search:", s.search("hello", top_k=1))
PY

View File

@@ -14,6 +14,6 @@ jobs:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
- uses: lycheeverse/lychee-action@v2 - uses: lycheeverse/lychee-action@v2
with: with:
args: --no-progress --insecure --user-agent 'curl/7.68.0' README.md docs/ apps/ examples/ benchmarks/ args: --no-progress --insecure README.md docs/ apps/ examples/ benchmarks/
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

1
.gitignore vendored
View File

@@ -18,7 +18,6 @@ demo/experiment_results/**/*.json
*.eml *.eml
*.emlx *.emlx
*.json *.json
!.vscode/*.json
*.sh *.sh
*.txt *.txt
!CMakeLists.txt !CMakeLists.txt

View File

@@ -1,5 +0,0 @@
{
"recommendations": [
"charliermarsh.ruff",
]
}

22
.vscode/settings.json vendored
View File

@@ -1,22 +0,0 @@
{
"python.defaultInterpreterPath": ".venv/bin/python",
"python.terminal.activateEnvironment": true,
"[python]": {
"editor.defaultFormatter": "charliermarsh.ruff",
"editor.formatOnSave": true,
"editor.codeActionsOnSave": {
"source.organizeImports": "explicit",
"source.fixAll": "explicit"
},
"editor.insertSpaces": true,
"editor.tabSize": 4
},
"ruff.enable": true,
"files.watcherExclude": {
"**/.venv/**": true,
"**/__pycache__/**": true,
"**/*.egg-info/**": true,
"**/build/**": true,
"**/dist/**": true
}
}

View File

@@ -87,60 +87,17 @@ git submodule update --init --recursive
``` ```
**macOS:** **macOS:**
Note: DiskANN requires MacOS 13.3 or later.
```bash ```bash
brew install libomp boost protobuf zeromq pkgconf brew install llvm libomp boost protobuf zeromq pkgconf
uv sync --extra diskann CC=$(brew --prefix llvm)/bin/clang CXX=$(brew --prefix llvm)/bin/clang++ uv sync
``` ```
**Linux (Ubuntu/Debian):** **Linux:**
Note: On Ubuntu 20.04, you may need to build a newer Abseil and pin Protobuf (e.g., v3.20.x) for building DiskANN. See [Issue #30](https://github.com/yichuan-w/LEANN/issues/30) for a step-by-step note.
You can manually install [Intel oneAPI MKL](https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html) instead of `libmkl-full-dev` for DiskANN. You can also use `libopenblas-dev` for building HNSW only, by removing `--extra diskann` in the command below.
```bash ```bash
sudo apt-get update && sudo apt-get install -y \ # Ubuntu/Debian (For Arch Linux: sudo pacman -S blas lapack openblas libaio boost protobuf abseil-cpp zeromq)
libomp-dev libboost-all-dev protobuf-compiler libzmq3-dev \ sudo apt-get update && sudo apt-get install -y libomp-dev libboost-all-dev protobuf-compiler libabsl-dev libmkl-full-dev libaio-dev libzmq3-dev
pkg-config libabsl-dev libaio-dev libprotobuf-dev \
libmkl-full-dev
uv sync --extra diskann uv sync
```
**Linux (Arch Linux):**
```bash
sudo pacman -Syu && sudo pacman -S --needed base-devel cmake pkgconf git gcc \
boost boost-libs protobuf abseil-cpp libaio zeromq
# For MKL in DiskANN
sudo pacman -S --needed base-devel git
git clone https://aur.archlinux.org/paru-bin.git
cd paru-bin && makepkg -si
paru -S intel-oneapi-mkl intel-oneapi-compiler
source /opt/intel/oneapi/setvars.sh
uv sync --extra diskann
```
**Linux (RHEL / CentOS Stream / Oracle / Rocky / AlmaLinux):**
See [Issue #50](https://github.com/yichuan-w/LEANN/issues/50) for more details.
```bash
sudo dnf groupinstall -y "Development Tools"
sudo dnf install -y libomp-devel boost-devel protobuf-compiler protobuf-devel \
abseil-cpp-devel libaio-devel zeromq-devel pkgconf-pkg-config
# For MKL in DiskANN
sudo dnf install -y intel-oneapi-mkl intel-oneapi-mkl-devel \
intel-oneapi-openmp || sudo dnf install -y intel-oneapi-compiler
source /opt/intel/oneapi/setvars.sh
uv sync --extra diskann
``` ```
</details> </details>
@@ -585,10 +542,8 @@ Options:
leann list leann list
# Lists all indexes across all projects with status indicators: # Lists all indexes across all projects with status indicators:
# - Index is complete and ready to use # - Index is complete and ready to use
# - Index is incomplete or corrupted # - Index is incomplete or corrupted
# 📁 - CLI-created index (in .leann/indexes/)
# 📄 - App-created index (*.leann.meta.json files)
``` ```
**Remove Command:** **Remove Command:**
@@ -602,7 +557,6 @@ Options:
# - Shows all matching indexes across projects # - Shows all matching indexes across projects
# - Requires confirmation for cross-project removal # - Requires confirmation for cross-project removal
# - Interactive selection when multiple matches found # - Interactive selection when multiple matches found
# - Supports both CLI and app-created indexes
``` ```
</details> </details>

View File

@@ -10,7 +10,6 @@ from typing import Any
import dotenv import dotenv
from leann.api import LeannBuilder, LeannChat from leann.api import LeannBuilder, LeannChat
from leann.registry import register_project_directory
from llama_index.core.node_parser import SentenceSplitter from llama_index.core.node_parser import SentenceSplitter
dotenv.load_dotenv() dotenv.load_dotenv()
@@ -215,11 +214,6 @@ class BaseRAGExample(ABC):
builder.build_index(index_path) builder.build_index(index_path)
print(f"Index saved to: {index_path}") print(f"Index saved to: {index_path}")
# Register project directory so leann list can discover this index
# The index is saved as args.index_dir/index_name.leann
# We want to register the current working directory where the app is run
register_project_directory(Path.cwd())
return index_path return index_path
async def run_interactive_chat(self, args, index_path: str): async def run_interactive_chat(self, args, index_path: str):

View File

@@ -183,9 +183,6 @@ class Benchmark:
start_time = time.time() start_time = time.time()
with torch.no_grad(): with torch.no_grad():
self.model(input_ids=input_ids, attention_mask=attention_mask) self.model(input_ids=input_ids, attention_mask=attention_mask)
# mps sync
if torch.backends.mps.is_available():
torch.mps.synchronize()
end_time = time.time() end_time = time.time()
return end_time - start_time return end_time - start_time

View File

@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
[project] [project]
name = "leann-backend-diskann" name = "leann-backend-diskann"
version = "0.3.1" version = "0.3.0"
dependencies = ["leann-core==0.3.1", "numpy", "protobuf>=3.19.0"] dependencies = ["leann-core==0.3.0", "numpy", "protobuf>=3.19.0"]
[tool.scikit-build] [tool.scikit-build]
# Key: simplified CMake path # Key: simplified CMake path

View File

@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
[project] [project]
name = "leann-backend-hnsw" name = "leann-backend-hnsw"
version = "0.3.1" version = "0.3.0"
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit." description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
dependencies = [ dependencies = [
"leann-core==0.3.1", "leann-core==0.3.0",
"numpy", "numpy",
"pyzmq>=23.0.0", "pyzmq>=23.0.0",
"msgpack>=1.0.0", "msgpack>=1.0.0",

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "leann-core" name = "leann-core"
version = "0.3.1" version = "0.3.0"
description = "Core API and plugin system for LEANN" description = "Core API and plugin system for LEANN"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@@ -614,7 +614,7 @@ class LeannSearcher:
zmq_port=zmq_port, zmq_port=zmq_port,
) )
# logger.info(f" Generated embedding shape: {query_embedding.shape}") # logger.info(f" Generated embedding shape: {query_embedding.shape}")
# time.time() - start_time time.time() - start_time
# logger.info(f" Embedding time: {embedding_time} seconds") # logger.info(f" Embedding time: {embedding_time} seconds")
start_time = time.time() start_time = time.time()
@@ -680,9 +680,8 @@ class LeannSearcher:
This method should be called after you're done using the searcher, This method should be called after you're done using the searcher,
especially in test environments or batch processing scenarios. especially in test environments or batch processing scenarios.
""" """
backend = getattr(self.backend_impl, "embedding_server_manager", None) if hasattr(self.backend_impl, "embedding_server_manager"):
if backend is not None: self.backend_impl.embedding_server_manager.stop_server()
backend.stop_server()
# Enable automatic cleanup patterns # Enable automatic cleanup patterns
def __enter__(self): def __enter__(self):

View File

@@ -707,28 +707,20 @@ class GeminiChat(LLMInterface):
logger.info(f"Sending request to Gemini with model {self.model}") logger.info(f"Sending request to Gemini with model {self.model}")
try: try:
from google.genai.types import GenerateContentConfig # Set generation configuration
generation_config = {
generation_config = GenerateContentConfig( "temperature": kwargs.get("temperature", 0.7),
temperature=kwargs.get("temperature", 0.7), "max_output_tokens": kwargs.get("max_tokens", 1000),
max_output_tokens=kwargs.get("max_tokens", 1000), }
)
# Handle top_p parameter # Handle top_p parameter
if "top_p" in kwargs: if "top_p" in kwargs:
generation_config.top_p = kwargs["top_p"] generation_config["top_p"] = kwargs["top_p"]
response = self.client.models.generate_content( response = self.client.models.generate_content(
model=self.model, model=self.model, contents=prompt, config=generation_config
contents=prompt,
config=generation_config,
) )
# Handle potential None response text return response.text.strip()
response_text = response.text
if response_text is None:
logger.warning("Gemini returned None response text")
return ""
return response_text.strip()
except Exception as e: except Exception as e:
logger.error(f"Error communicating with Gemini: {e}") logger.error(f"Error communicating with Gemini: {e}")
return f"Error: Could not get a response from Gemini. Details: {e}" return f"Error: Could not get a response from Gemini. Details: {e}"

View File

@@ -1,14 +1,13 @@
import argparse import argparse
import asyncio import asyncio
from pathlib import Path from pathlib import Path
from typing import Optional, Union from typing import Union
from llama_index.core import SimpleDirectoryReader from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter from llama_index.core.node_parser import SentenceSplitter
from tqdm import tqdm from tqdm import tqdm
from .api import LeannBuilder, LeannChat, LeannSearcher from .api import LeannBuilder, LeannChat, LeannSearcher
from .registry import register_project_directory
def extract_pdf_text_with_pymupdf(file_path: str) -> str: def extract_pdf_text_with_pymupdf(file_path: str) -> str:
@@ -206,11 +205,6 @@ Examples:
default="global", default="global",
help="Pruning strategy (default: global)", help="Pruning strategy (default: global)",
) )
search_parser.add_argument(
"--non-interactive",
action="store_true",
help="Non-interactive mode: automatically select index without prompting",
)
# Ask command # Ask command
ask_parser = subparsers.add_parser("ask", help="Ask questions") ask_parser = subparsers.add_parser("ask", help="Ask questions")
@@ -269,7 +263,31 @@ Examples:
def register_project_dir(self): def register_project_dir(self):
"""Register current project directory in global registry""" """Register current project directory in global registry"""
register_project_directory() global_registry = Path.home() / ".leann" / "projects.json"
global_registry.parent.mkdir(exist_ok=True)
current_dir = str(Path.cwd())
# Load existing registry
projects = []
if global_registry.exists():
try:
import json
with open(global_registry) as f:
projects = json.load(f)
except Exception:
projects = []
# Add current directory if not already present
if current_dir not in projects:
projects.append(current_dir)
# Save registry
import json
with open(global_registry, "w") as f:
json.dump(projects, f, indent=2)
def _build_gitignore_parser(self, docs_dir: str): def _build_gitignore_parser(self, docs_dir: str):
"""Build gitignore parser using gitignore-parser library.""" """Build gitignore parser using gitignore-parser library."""
@@ -355,10 +373,13 @@ Examples:
valid_projects.append(current_path) valid_projects.append(current_path)
# Separate current and other projects # Separate current and other projects
current_project = None
other_projects = [] other_projects = []
for project_path in valid_projects: for project_path in valid_projects:
if project_path != current_path: if project_path == current_path:
current_project = project_path
else:
other_projects.append(project_path) other_projects.append(project_path)
print("📚 LEANN Indexes") print("📚 LEANN Indexes")
@@ -368,20 +389,35 @@ Examples:
current_indexes_count = 0 current_indexes_count = 0
# Show current project first (most important) # Show current project first (most important)
print("\n🏠 Current Project") if current_project:
print(f" {current_path}") current_indexes_dir = current_project / ".leann" / "indexes"
print(" " + "" * 45) if current_indexes_dir.exists():
current_index_dirs = [d for d in current_indexes_dir.iterdir() if d.is_dir()]
current_indexes = self._discover_indexes_in_project(current_path) print("\n🏠 Current Project")
if current_indexes: print(f" {current_project}")
for idx in current_indexes: print(" " + "" * 45)
total_indexes += 1
current_indexes_count += 1 if current_index_dirs:
type_icon = "📁" if idx["type"] == "cli" else "📄" for index_dir in current_index_dirs:
print(f" {current_indexes_count}. {type_icon} {idx['name']} {idx['status']}") total_indexes += 1
if idx["size_mb"] > 0: current_indexes_count += 1
print(f" 📦 Size: {idx['size_mb']:.1f} MB") index_name = index_dir.name
meta_file = index_dir / "documents.leann.meta.json"
status = "" if meta_file.exists() else ""
print(f" {current_indexes_count}. {index_name} {status}")
if meta_file.exists():
size_mb = sum(
f.stat().st_size for f in index_dir.iterdir() if f.is_file()
) / (1024 * 1024)
print(f" 📦 Size: {size_mb:.1f} MB")
else:
print(" 📭 No indexes in current project")
else: else:
print("\n🏠 Current Project")
print(f" {current_path}")
print(" " + "" * 45)
print(" 📭 No indexes in current project") print(" 📭 No indexes in current project")
# Show other projects (reference information) # Show other projects (reference information)
@@ -390,19 +426,29 @@ Examples:
print(" " + "" * 45) print(" " + "" * 45)
for project_path in other_projects: for project_path in other_projects:
project_indexes = self._discover_indexes_in_project(project_path) indexes_dir = project_path / ".leann" / "indexes"
if not project_indexes: if not indexes_dir.exists():
continue
index_dirs = [d for d in indexes_dir.iterdir() if d.is_dir()]
if not index_dirs:
continue continue
print(f"\n 📂 {project_path.name}") print(f"\n 📂 {project_path.name}")
print(f" {project_path}") print(f" {project_path}")
for idx in project_indexes: for index_dir in index_dirs:
total_indexes += 1 total_indexes += 1
type_icon = "📁" if idx["type"] == "cli" else "📄" index_name = index_dir.name
print(f"{type_icon} {idx['name']} {idx['status']}") meta_file = index_dir / "documents.leann.meta.json"
if idx["size_mb"] > 0: status = "" if meta_file.exists() else ""
print(f" 📦 {idx['size_mb']:.1f} MB")
print(f"{index_name} {status}")
if meta_file.exists():
size_mb = sum(
f.stat().st_size for f in index_dir.iterdir() if f.is_file()
) / (1024 * 1024)
print(f" 📦 {size_mb:.1f} MB")
# Summary and usage info # Summary and usage info
print("\n" + "=" * 50) print("\n" + "=" * 50)
@@ -410,9 +456,13 @@ Examples:
print("💡 Get started:") print("💡 Get started:")
print(" leann build my-docs --docs ./documents") print(" leann build my-docs --docs ./documents")
else: else:
# Count only projects that have at least one discoverable index projects_count = len(
projects_count = sum( [
1 for p in valid_projects if len(self._discover_indexes_in_project(p)) > 0 p
for p in valid_projects
if (p / ".leann" / "indexes").exists()
and list((p / ".leann" / "indexes").iterdir())
]
) )
print(f"📊 Total: {total_indexes} indexes across {projects_count} projects") print(f"📊 Total: {total_indexes} indexes across {projects_count} projects")
@@ -430,76 +480,6 @@ Examples:
print("\n💡 Create your first index:") print("\n💡 Create your first index:")
print(" leann build my-docs --docs ./documents") print(" leann build my-docs --docs ./documents")
def _discover_indexes_in_project(self, project_path: Path):
"""Discover all indexes in a project directory (both CLI and apps formats)"""
indexes = []
# 1. CLI format: .leann/indexes/index_name/
cli_indexes_dir = project_path / ".leann" / "indexes"
if cli_indexes_dir.exists():
for index_dir in cli_indexes_dir.iterdir():
if index_dir.is_dir():
meta_file = index_dir / "documents.leann.meta.json"
status = "" if meta_file.exists() else ""
size_mb = 0
if meta_file.exists():
try:
size_mb = sum(
f.stat().st_size for f in index_dir.iterdir() if f.is_file()
) / (1024 * 1024)
except (OSError, PermissionError):
pass
indexes.append(
{
"name": index_dir.name,
"type": "cli",
"status": status,
"size_mb": size_mb,
"path": index_dir,
}
)
# 2. Apps format: *.leann.meta.json files anywhere in the project
cli_indexes_dir = project_path / ".leann" / "indexes"
for meta_file in project_path.rglob("*.leann.meta.json"):
if meta_file.is_file():
# Skip CLI-built indexes (which store meta under .leann/indexes/<name>/)
try:
if cli_indexes_dir.exists() and cli_indexes_dir in meta_file.parents:
continue
except Exception:
pass
# Use the parent directory name as the app index display name
display_name = meta_file.parent.name
# Extract file base used to store files
file_base = meta_file.name.replace(".leann.meta.json", "")
# Apps indexes are considered complete if the .leann.meta.json file exists
status = ""
# Calculate total size of all related files (use file base)
size_mb = 0
try:
index_dir = meta_file.parent
for related_file in index_dir.glob(f"{file_base}.leann*"):
size_mb += related_file.stat().st_size / (1024 * 1024)
except (OSError, PermissionError):
pass
indexes.append(
{
"name": display_name,
"type": "app",
"status": status,
"size_mb": size_mb,
"path": meta_file,
}
)
return indexes
def remove_index(self, index_name: str, force: bool = False): def remove_index(self, index_name: str, force: bool = False):
"""Safely remove an index - always show all matches for transparency""" """Safely remove an index - always show all matches for transparency"""
@@ -544,79 +524,13 @@ Examples:
if not project_path.exists(): if not project_path.exists():
continue continue
# 1) CLI-format index under .leann/indexes/<name>
index_dir = project_path / ".leann" / "indexes" / index_name index_dir = project_path / ".leann" / "indexes" / index_name
if index_dir.exists(): if index_dir.exists():
is_current = project_path == current_path is_current = project_path == current_path
matches.append( matches.append(
{ {"project_path": project_path, "index_dir": index_dir, "is_current": is_current}
"project_path": project_path,
"index_dir": index_dir,
"is_current": is_current,
"kind": "cli",
}
) )
# 2) App-format indexes
# We support two ways of addressing apps:
# a) by the file base (e.g., `pdf_documents`)
# b) by the parent directory name (e.g., `new_txt`)
seen_app_meta = set()
# 2a) by file base
for meta_file in project_path.rglob(f"{index_name}.leann.meta.json"):
if meta_file.is_file():
# Skip CLI-built indexes' meta under .leann/indexes
try:
cli_indexes_dir = project_path / ".leann" / "indexes"
if cli_indexes_dir.exists() and cli_indexes_dir in meta_file.parents:
continue
except Exception:
pass
is_current = project_path == current_path
key = (str(project_path), str(meta_file))
if key in seen_app_meta:
continue
seen_app_meta.add(key)
matches.append(
{
"project_path": project_path,
"files_dir": meta_file.parent,
"meta_file": meta_file,
"is_current": is_current,
"kind": "app",
"display_name": meta_file.parent.name,
"file_base": meta_file.name.replace(".leann.meta.json", ""),
}
)
# 2b) by parent directory name
for meta_file in project_path.rglob("*.leann.meta.json"):
if meta_file.is_file() and meta_file.parent.name == index_name:
# Skip CLI-built indexes' meta under .leann/indexes
try:
cli_indexes_dir = project_path / ".leann" / "indexes"
if cli_indexes_dir.exists() and cli_indexes_dir in meta_file.parents:
continue
except Exception:
pass
is_current = project_path == current_path
key = (str(project_path), str(meta_file))
if key in seen_app_meta:
continue
seen_app_meta.add(key)
matches.append(
{
"project_path": project_path,
"files_dir": meta_file.parent,
"meta_file": meta_file,
"is_current": is_current,
"kind": "app",
"display_name": meta_file.parent.name,
"file_base": meta_file.name.replace(".leann.meta.json", ""),
}
)
# Sort: current project first, then by project name # Sort: current project first, then by project name
matches.sort(key=lambda x: (not x["is_current"], x["project_path"].name)) matches.sort(key=lambda x: (not x["is_current"], x["project_path"].name))
return matches return matches
@@ -624,8 +538,8 @@ Examples:
def _remove_single_match(self, match, index_name: str, force: bool): def _remove_single_match(self, match, index_name: str, force: bool):
"""Handle removal when only one match is found""" """Handle removal when only one match is found"""
project_path = match["project_path"] project_path = match["project_path"]
index_dir = match["index_dir"]
is_current = match["is_current"] is_current = match["is_current"]
kind = match.get("kind", "cli")
if is_current: if is_current:
location_info = "current project" location_info = "current project"
@@ -636,10 +550,7 @@ Examples:
print(f"✅ Found 1 index named '{index_name}':") print(f"✅ Found 1 index named '{index_name}':")
print(f" {emoji} Location: {location_info}") print(f" {emoji} Location: {location_info}")
if kind == "cli": print(f" 📍 Path: {project_path}")
print(f" 📍 Path: {project_path / '.leann' / 'indexes' / index_name}")
else:
print(f" 📍 Meta: {match['meta_file']}")
if not force: if not force:
if not is_current: if not is_current:
@@ -651,22 +562,9 @@ Examples:
print(" ❌ Removal cancelled.") print(" ❌ Removal cancelled.")
return False return False
if kind == "cli": return self._delete_index_directory(
return self._delete_index_directory( index_dir, index_name, project_path if not is_current else None
match["index_dir"], )
index_name,
project_path if not is_current else None,
is_app=False,
)
else:
return self._delete_index_directory(
match["files_dir"],
match.get("display_name", index_name),
project_path if not is_current else None,
is_app=True,
meta_file=match.get("meta_file"),
app_file_base=match.get("file_base"),
)
def _remove_from_multiple_matches(self, matches, index_name: str, force: bool): def _remove_from_multiple_matches(self, matches, index_name: str, force: bool):
"""Handle removal when multiple matches are found""" """Handle removal when multiple matches are found"""
@@ -677,34 +575,19 @@ Examples:
for i, match in enumerate(matches, 1): for i, match in enumerate(matches, 1):
project_path = match["project_path"] project_path = match["project_path"]
is_current = match["is_current"] is_current = match["is_current"]
kind = match.get("kind", "cli")
if is_current: if is_current:
print(f" {i}. 🏠 Current project ({'CLI' if kind == 'cli' else 'APP'})") print(f" {i}. 🏠 Current project")
print(f" 📍 {project_path}")
else: else:
print(f" {i}. 📂 {project_path.name} ({'CLI' if kind == 'cli' else 'APP'})") print(f" {i}. 📂 {project_path.name}")
print(f" 📍 {project_path}")
# Show path details
if kind == "cli":
print(f" 📍 {project_path / '.leann' / 'indexes' / index_name}")
else:
print(f" 📍 {match['meta_file']}")
# Show size info # Show size info
try: try:
if kind == "cli": size_mb = sum(
size_mb = sum( f.stat().st_size for f in match["index_dir"].iterdir() if f.is_file()
f.stat().st_size for f in match["index_dir"].iterdir() if f.is_file() ) / (1024 * 1024)
) / (1024 * 1024)
else:
file_base = match.get("file_base")
size_mb = 0.0
if file_base:
size_mb = sum(
f.stat().st_size
for f in match["files_dir"].glob(f"{file_base}.leann*")
if f.is_file()
) / (1024 * 1024)
print(f" 📦 Size: {size_mb:.1f} MB") print(f" 📦 Size: {size_mb:.1f} MB")
except (OSError, PermissionError): except (OSError, PermissionError):
pass pass
@@ -728,8 +611,8 @@ Examples:
if 0 <= choice_idx < len(matches): if 0 <= choice_idx < len(matches):
selected_match = matches[choice_idx] selected_match = matches[choice_idx]
project_path = selected_match["project_path"] project_path = selected_match["project_path"]
index_dir = selected_match["index_dir"]
is_current = selected_match["is_current"] is_current = selected_match["is_current"]
kind = selected_match.get("kind", "cli")
location = "current project" if is_current else f"'{project_path.name}' project" location = "current project" if is_current else f"'{project_path.name}' project"
print(f" 🎯 Selected: Remove from {location}") print(f" 🎯 Selected: Remove from {location}")
@@ -742,22 +625,9 @@ Examples:
print(" ❌ Confirmation failed. Removal cancelled.") print(" ❌ Confirmation failed. Removal cancelled.")
return False return False
if kind == "cli": return self._delete_index_directory(
return self._delete_index_directory( index_dir, index_name, project_path if not is_current else None
selected_match["index_dir"], )
index_name,
project_path if not is_current else None,
is_app=False,
)
else:
return self._delete_index_directory(
selected_match["files_dir"],
selected_match.get("display_name", index_name),
project_path if not is_current else None,
is_app=True,
meta_file=selected_match.get("meta_file"),
app_file_base=selected_match.get("file_base"),
)
else: else:
print(" ❌ Invalid choice. Removal cancelled.") print(" ❌ Invalid choice. Removal cancelled.")
return False return False
@@ -767,65 +637,21 @@ Examples:
return False return False
def _delete_index_directory( def _delete_index_directory(
self, self, index_dir: Path, index_name: str, project_path: Path | None = None
index_dir: Path,
index_display_name: str,
project_path: Optional[Path] = None,
is_app: bool = False,
meta_file: Optional[Path] = None,
app_file_base: Optional[str] = None,
): ):
"""Delete a CLI index directory or APP index files safely.""" """Actually delete the index directory"""
try: try:
if is_app: import shutil
removed = 0
errors = 0
# Delete only files that belong to this app index (based on file base)
pattern_base = app_file_base or ""
for f in index_dir.glob(f"{pattern_base}.leann*"):
try:
f.unlink()
removed += 1
except Exception:
errors += 1
# Best-effort: also remove the meta file if specified and still exists
if meta_file and meta_file.exists():
try:
meta_file.unlink()
removed += 1
except Exception:
errors += 1
if removed > 0 and errors == 0: shutil.rmtree(index_dir)
if project_path:
print( if project_path:
f"App index '{index_display_name}' removed from {project_path.name}" print(f"Index '{index_name}' removed from {project_path.name}")
)
else:
print(f"✅ App index '{index_display_name}' removed successfully")
return True
elif removed > 0 and errors > 0:
print(
f"⚠️ App index '{index_display_name}' partially removed (some files couldn't be deleted)"
)
return True
else:
print(
f"❌ No files found to remove for app index '{index_display_name}' in {index_dir}"
)
return False
else: else:
import shutil print(f"✅ Index '{index_name}' removed successfully")
return True
shutil.rmtree(index_dir)
if project_path:
print(f"✅ Index '{index_display_name}' removed from {project_path.name}")
else:
print(f"✅ Index '{index_display_name}' removed successfully")
return True
except Exception as e: except Exception as e:
print(f"❌ Error removing index '{index_display_name}': {e}") print(f"❌ Error removing index '{index_name}': {e}")
return False return False
def load_documents( def load_documents(
@@ -1249,101 +1075,13 @@ Examples:
async def search_documents(self, args): async def search_documents(self, args):
index_name = args.index_name index_name = args.index_name
query = args.query query = args.query
# First try to find the index in current project
index_path = self.get_index_path(index_name) index_path = self.get_index_path(index_name)
if self.index_exists(index_name):
# Found in current project, use it
pass
else:
# Search across all registered projects (like list_indexes does)
all_matches = self._find_all_matching_indexes(index_name)
if not all_matches:
print(
f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
)
return
elif len(all_matches) == 1:
# Found exactly one match, use it
match = all_matches[0]
if match["kind"] == "cli":
index_path = str(match["index_dir"] / "documents.leann")
else:
# App format: use the meta file to construct the path
meta_file = match["meta_file"]
file_base = match["file_base"]
index_path = str(meta_file.parent / f"{file_base}.leann")
project_info = ( if not self.index_exists(index_name):
"current project" print(
if match["is_current"] f"Index '{index_name}' not found. Use 'leann build {index_name} --docs <dir> [<dir2> ...]' to create it."
else f"project '{match['project_path'].name}'" )
) return
print(f"Using index '{index_name}' from {project_info}")
else:
# Multiple matches found
if args.non_interactive:
# Non-interactive mode: automatically select the best match
# Priority: current project first, then first available
current_matches = [m for m in all_matches if m["is_current"]]
if current_matches:
match = current_matches[0]
location_desc = "current project"
else:
match = all_matches[0]
location_desc = f"project '{match['project_path'].name}'"
if match["kind"] == "cli":
index_path = str(match["index_dir"] / "documents.leann")
else:
meta_file = match["meta_file"]
file_base = match["file_base"]
index_path = str(meta_file.parent / f"{file_base}.leann")
print(
f"Found {len(all_matches)} indexes named '{index_name}', using index from {location_desc}"
)
else:
# Interactive mode: ask user to choose
print(f"Found {len(all_matches)} indexes named '{index_name}':")
for i, match in enumerate(all_matches, 1):
project_path = match["project_path"]
is_current = match["is_current"]
kind = match.get("kind", "cli")
if is_current:
print(
f" {i}. 🏠 Current project ({'CLI' if kind == 'cli' else 'APP'})"
)
else:
print(
f" {i}. 📂 {project_path.name} ({'CLI' if kind == 'cli' else 'APP'})"
)
try:
choice = input(f"Which index to search? (1-{len(all_matches)}): ").strip()
choice_idx = int(choice) - 1
if 0 <= choice_idx < len(all_matches):
match = all_matches[choice_idx]
if match["kind"] == "cli":
index_path = str(match["index_dir"] / "documents.leann")
else:
meta_file = match["meta_file"]
file_base = match["file_base"]
index_path = str(meta_file.parent / f"{file_base}.leann")
project_info = (
"current project"
if match["is_current"]
else f"project '{match['project_path'].name}'"
)
print(f"Using index '{index_name}' from {project_info}")
else:
print("Invalid choice. Aborting search.")
return
except (ValueError, KeyboardInterrupt):
print("Invalid input. Aborting search.")
return
searcher = LeannSearcher(index_path=index_path) searcher = LeannSearcher(index_path=index_path)
results = searcher.search( results = searcher.search(

View File

@@ -94,7 +94,7 @@ def handle_request(request):
}, },
} }
# Build simplified command with non-interactive flag for MCP compatibility # Build simplified command
cmd = [ cmd = [
"leann", "leann",
"search", "search",
@@ -102,7 +102,6 @@ def handle_request(request):
args["query"], args["query"],
f"--top-k={args.get('top_k', 5)}", f"--top-k={args.get('top_k', 5)}",
f"--complexity={args.get('complexity', 32)}", f"--complexity={args.get('complexity', 32)}",
"--non-interactive",
] ]
result = subprocess.run(cmd, capture_output=True, text=True) result = subprocess.run(cmd, capture_output=True, text=True)

View File

@@ -2,10 +2,8 @@
import importlib import importlib
import importlib.metadata import importlib.metadata
import json
import logging import logging
from pathlib import Path from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Optional, Union
if TYPE_CHECKING: if TYPE_CHECKING:
from leann.interface import LeannBackendFactoryInterface from leann.interface import LeannBackendFactoryInterface
@@ -45,54 +43,3 @@ def autodiscover_backends():
# print(f"WARN: Could not import backend module '{backend_module_name}': {e}") # print(f"WARN: Could not import backend module '{backend_module_name}': {e}")
pass pass
# print("INFO: Backend auto-discovery finished.") # print("INFO: Backend auto-discovery finished.")
def register_project_directory(project_dir: Optional[Union[str, Path]] = None):
"""
Register a project directory in the global LEANN registry.
This allows `leann list` to discover indexes created by apps or other tools.
Args:
project_dir: Directory to register. If None, uses current working directory.
"""
if project_dir is None:
project_dir = Path.cwd()
else:
project_dir = Path(project_dir)
# Only register directories that have some kind of LEANN content
# Either .leann/indexes/ (CLI format) or *.leann.meta.json files (apps format)
has_cli_indexes = (project_dir / ".leann" / "indexes").exists()
has_app_indexes = any(project_dir.rglob("*.leann.meta.json"))
if not (has_cli_indexes or has_app_indexes):
# Don't register if there are no LEANN indexes
return
global_registry = Path.home() / ".leann" / "projects.json"
global_registry.parent.mkdir(exist_ok=True)
project_str = str(project_dir.resolve())
# Load existing registry
projects = []
if global_registry.exists():
try:
with open(global_registry) as f:
projects = json.load(f)
except Exception:
logger.debug("Could not load existing project registry")
projects = []
# Add project if not already present
if project_str not in projects:
projects.append(project_str)
# Save updated registry
try:
with open(global_registry, "w") as f:
json.dump(projects, f, indent=2)
logger.debug(f"Registered project directory: {project_str}")
except Exception as e:
logger.warning(f"Could not save project registry: {e}")

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "leann" name = "leann"
version = "0.3.1" version = "0.3.0"
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!" description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
readme = "README.md" readme = "README.md"
requires-python = ">=3.9" requires-python = ">=3.9"

View File

@@ -14,6 +14,8 @@ dependencies = [
"numpy>=1.26.0", "numpy>=1.26.0",
"torch", "torch",
"tqdm", "tqdm",
"flask",
"flask_compress",
"datasets>=2.15.0", "datasets>=2.15.0",
"evaluate", "evaluate",
"colorama", "colorama",
@@ -64,7 +66,9 @@ test = [
"pytest>=7.0", "pytest>=7.0",
"pytest-timeout>=2.0", "pytest-timeout>=2.0",
"llama-index-core>=0.12.0", "llama-index-core>=0.12.0",
"llama-index-readers-file>=0.4.0",
"python-dotenv>=1.0.0", "python-dotenv>=1.0.0",
"sentence-transformers>=2.2.0",
] ]
diskann = [ diskann = [
@@ -96,8 +100,13 @@ leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }
[tool.ruff] [tool.ruff]
target-version = "py39" target-version = "py39"
line-length = 100 line-length = 100
extend-exclude = ["third_party"] extend-exclude = [
"third_party",
"*.egg-info",
"__pycache__",
".git",
".venv",
]
[tool.ruff.lint] [tool.ruff.lint]
select = [ select = [
@@ -120,12 +129,21 @@ ignore = [
"RUF012", # mutable class attributes should be annotated with typing.ClassVar "RUF012", # mutable class attributes should be annotated with typing.ClassVar
] ]
[tool.ruff.lint.per-file-ignores]
"test/**/*.py" = ["E402"] # module level import not at top of file (common in tests)
"examples/**/*.py" = ["E402"] # module level import not at top of file (common in examples)
[tool.ruff.format] [tool.ruff.format]
quote-style = "double" quote-style = "double"
indent-style = "space" indent-style = "space"
skip-magic-trailing-comma = false skip-magic-trailing-comma = false
line-ending = "auto" line-ending = "auto"
[dependency-groups]
dev = [
"ruff>=0.12.4",
]
[tool.lychee] [tool.lychee]
accept = ["200", "403", "429", "503"] accept = ["200", "403", "429", "503"]
timeout = 20 timeout = 20

7313
uv.lock generated
View File

File diff suppressed because it is too large Load Diff