Compare commits
1 Commits
v0.3.1
...
fix-arch-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6b465d5a0f |
@@ -183,9 +183,6 @@ class Benchmark:
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
self.model(input_ids=input_ids, attention_mask=attention_mask)
|
self.model(input_ids=input_ids, attention_mask=attention_mask)
|
||||||
# mps sync
|
|
||||||
if torch.backends.mps.is_available():
|
|
||||||
torch.mps.synchronize()
|
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
|
|
||||||
return end_time - start_time
|
return end_time - start_time
|
||||||
|
|||||||
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-backend-diskann"
|
name = "leann-backend-diskann"
|
||||||
version = "0.3.1"
|
version = "0.3.0"
|
||||||
dependencies = ["leann-core==0.3.1", "numpy", "protobuf>=3.19.0"]
|
dependencies = ["leann-core==0.3.0", "numpy", "protobuf>=3.19.0"]
|
||||||
|
|
||||||
[tool.scikit-build]
|
[tool.scikit-build]
|
||||||
# Key: simplified CMake path
|
# Key: simplified CMake path
|
||||||
|
|||||||
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-backend-hnsw"
|
name = "leann-backend-hnsw"
|
||||||
version = "0.3.1"
|
version = "0.3.0"
|
||||||
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"leann-core==0.3.1",
|
"leann-core==0.3.0",
|
||||||
"numpy",
|
"numpy",
|
||||||
"pyzmq>=23.0.0",
|
"pyzmq>=23.0.0",
|
||||||
"msgpack>=1.0.0",
|
"msgpack>=1.0.0",
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-core"
|
name = "leann-core"
|
||||||
version = "0.3.1"
|
version = "0.3.0"
|
||||||
description = "Core API and plugin system for LEANN"
|
description = "Core API and plugin system for LEANN"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
@@ -405,9 +405,13 @@ Examples:
|
|||||||
print("💡 Get started:")
|
print("💡 Get started:")
|
||||||
print(" leann build my-docs --docs ./documents")
|
print(" leann build my-docs --docs ./documents")
|
||||||
else:
|
else:
|
||||||
# Count only projects that have at least one discoverable index
|
projects_count = len(
|
||||||
projects_count = sum(
|
[
|
||||||
1 for p in valid_projects if len(self._discover_indexes_in_project(p)) > 0
|
p
|
||||||
|
for p in valid_projects
|
||||||
|
if (p / ".leann" / "indexes").exists()
|
||||||
|
and list((p / ".leann" / "indexes").iterdir())
|
||||||
|
]
|
||||||
)
|
)
|
||||||
print(f"📊 Total: {total_indexes} indexes across {projects_count} projects")
|
print(f"📊 Total: {total_indexes} indexes across {projects_count} projects")
|
||||||
|
|
||||||
@@ -457,35 +461,26 @@ Examples:
|
|||||||
)
|
)
|
||||||
|
|
||||||
# 2. Apps format: *.leann.meta.json files anywhere in the project
|
# 2. Apps format: *.leann.meta.json files anywhere in the project
|
||||||
cli_indexes_dir = project_path / ".leann" / "indexes"
|
|
||||||
for meta_file in project_path.rglob("*.leann.meta.json"):
|
for meta_file in project_path.rglob("*.leann.meta.json"):
|
||||||
if meta_file.is_file():
|
if meta_file.is_file():
|
||||||
# Skip CLI-built indexes (which store meta under .leann/indexes/<name>/)
|
# Extract index name from filename (remove .leann.meta.json extension)
|
||||||
try:
|
index_name = meta_file.name.replace(".leann.meta.json", "")
|
||||||
if cli_indexes_dir.exists() and cli_indexes_dir in meta_file.parents:
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
# Use the parent directory name as the app index display name
|
|
||||||
display_name = meta_file.parent.name
|
|
||||||
# Extract file base used to store files
|
|
||||||
file_base = meta_file.name.replace(".leann.meta.json", "")
|
|
||||||
|
|
||||||
# Apps indexes are considered complete if the .leann.meta.json file exists
|
# Apps indexes are considered complete if the .leann.meta.json file exists
|
||||||
status = "✅"
|
status = "✅"
|
||||||
|
|
||||||
# Calculate total size of all related files (use file base)
|
# Calculate total size of all related files
|
||||||
size_mb = 0
|
size_mb = 0
|
||||||
try:
|
try:
|
||||||
index_dir = meta_file.parent
|
index_dir = meta_file.parent
|
||||||
for related_file in index_dir.glob(f"{file_base}.leann*"):
|
for related_file in index_dir.glob(f"{index_name}.leann*"):
|
||||||
size_mb += related_file.stat().st_size / (1024 * 1024)
|
size_mb += related_file.stat().st_size / (1024 * 1024)
|
||||||
except (OSError, PermissionError):
|
except (OSError, PermissionError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
indexes.append(
|
indexes.append(
|
||||||
{
|
{
|
||||||
"name": display_name,
|
"name": index_name,
|
||||||
"type": "app",
|
"type": "app",
|
||||||
"status": status,
|
"status": status,
|
||||||
"size_mb": size_mb,
|
"size_mb": size_mb,
|
||||||
@@ -539,79 +534,13 @@ Examples:
|
|||||||
if not project_path.exists():
|
if not project_path.exists():
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# 1) CLI-format index under .leann/indexes/<name>
|
|
||||||
index_dir = project_path / ".leann" / "indexes" / index_name
|
index_dir = project_path / ".leann" / "indexes" / index_name
|
||||||
if index_dir.exists():
|
if index_dir.exists():
|
||||||
is_current = project_path == current_path
|
is_current = project_path == current_path
|
||||||
matches.append(
|
matches.append(
|
||||||
{
|
{"project_path": project_path, "index_dir": index_dir, "is_current": is_current}
|
||||||
"project_path": project_path,
|
|
||||||
"index_dir": index_dir,
|
|
||||||
"is_current": is_current,
|
|
||||||
"kind": "cli",
|
|
||||||
}
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# 2) App-format indexes
|
|
||||||
# We support two ways of addressing apps:
|
|
||||||
# a) by the file base (e.g., `pdf_documents`)
|
|
||||||
# b) by the parent directory name (e.g., `new_txt`)
|
|
||||||
seen_app_meta = set()
|
|
||||||
|
|
||||||
# 2a) by file base
|
|
||||||
for meta_file in project_path.rglob(f"{index_name}.leann.meta.json"):
|
|
||||||
if meta_file.is_file():
|
|
||||||
# Skip CLI-built indexes' meta under .leann/indexes
|
|
||||||
try:
|
|
||||||
cli_indexes_dir = project_path / ".leann" / "indexes"
|
|
||||||
if cli_indexes_dir.exists() and cli_indexes_dir in meta_file.parents:
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
is_current = project_path == current_path
|
|
||||||
key = (str(project_path), str(meta_file))
|
|
||||||
if key in seen_app_meta:
|
|
||||||
continue
|
|
||||||
seen_app_meta.add(key)
|
|
||||||
matches.append(
|
|
||||||
{
|
|
||||||
"project_path": project_path,
|
|
||||||
"files_dir": meta_file.parent,
|
|
||||||
"meta_file": meta_file,
|
|
||||||
"is_current": is_current,
|
|
||||||
"kind": "app",
|
|
||||||
"display_name": meta_file.parent.name,
|
|
||||||
"file_base": meta_file.name.replace(".leann.meta.json", ""),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# 2b) by parent directory name
|
|
||||||
for meta_file in project_path.rglob("*.leann.meta.json"):
|
|
||||||
if meta_file.is_file() and meta_file.parent.name == index_name:
|
|
||||||
# Skip CLI-built indexes' meta under .leann/indexes
|
|
||||||
try:
|
|
||||||
cli_indexes_dir = project_path / ".leann" / "indexes"
|
|
||||||
if cli_indexes_dir.exists() and cli_indexes_dir in meta_file.parents:
|
|
||||||
continue
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
is_current = project_path == current_path
|
|
||||||
key = (str(project_path), str(meta_file))
|
|
||||||
if key in seen_app_meta:
|
|
||||||
continue
|
|
||||||
seen_app_meta.add(key)
|
|
||||||
matches.append(
|
|
||||||
{
|
|
||||||
"project_path": project_path,
|
|
||||||
"files_dir": meta_file.parent,
|
|
||||||
"meta_file": meta_file,
|
|
||||||
"is_current": is_current,
|
|
||||||
"kind": "app",
|
|
||||||
"display_name": meta_file.parent.name,
|
|
||||||
"file_base": meta_file.name.replace(".leann.meta.json", ""),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Sort: current project first, then by project name
|
# Sort: current project first, then by project name
|
||||||
matches.sort(key=lambda x: (not x["is_current"], x["project_path"].name))
|
matches.sort(key=lambda x: (not x["is_current"], x["project_path"].name))
|
||||||
return matches
|
return matches
|
||||||
@@ -619,8 +548,8 @@ Examples:
|
|||||||
def _remove_single_match(self, match, index_name: str, force: bool):
|
def _remove_single_match(self, match, index_name: str, force: bool):
|
||||||
"""Handle removal when only one match is found"""
|
"""Handle removal when only one match is found"""
|
||||||
project_path = match["project_path"]
|
project_path = match["project_path"]
|
||||||
|
index_dir = match["index_dir"]
|
||||||
is_current = match["is_current"]
|
is_current = match["is_current"]
|
||||||
kind = match.get("kind", "cli")
|
|
||||||
|
|
||||||
if is_current:
|
if is_current:
|
||||||
location_info = "current project"
|
location_info = "current project"
|
||||||
@@ -631,10 +560,7 @@ Examples:
|
|||||||
|
|
||||||
print(f"✅ Found 1 index named '{index_name}':")
|
print(f"✅ Found 1 index named '{index_name}':")
|
||||||
print(f" {emoji} Location: {location_info}")
|
print(f" {emoji} Location: {location_info}")
|
||||||
if kind == "cli":
|
print(f" 📍 Path: {project_path}")
|
||||||
print(f" 📍 Path: {project_path / '.leann' / 'indexes' / index_name}")
|
|
||||||
else:
|
|
||||||
print(f" 📍 Meta: {match['meta_file']}")
|
|
||||||
|
|
||||||
if not force:
|
if not force:
|
||||||
if not is_current:
|
if not is_current:
|
||||||
@@ -646,22 +572,9 @@ Examples:
|
|||||||
print(" ❌ Removal cancelled.")
|
print(" ❌ Removal cancelled.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if kind == "cli":
|
return self._delete_index_directory(
|
||||||
return self._delete_index_directory(
|
index_dir, index_name, project_path if not is_current else None
|
||||||
match["index_dir"],
|
)
|
||||||
index_name,
|
|
||||||
project_path if not is_current else None,
|
|
||||||
is_app=False,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return self._delete_index_directory(
|
|
||||||
match["files_dir"],
|
|
||||||
match.get("display_name", index_name),
|
|
||||||
project_path if not is_current else None,
|
|
||||||
is_app=True,
|
|
||||||
meta_file=match.get("meta_file"),
|
|
||||||
app_file_base=match.get("file_base"),
|
|
||||||
)
|
|
||||||
|
|
||||||
def _remove_from_multiple_matches(self, matches, index_name: str, force: bool):
|
def _remove_from_multiple_matches(self, matches, index_name: str, force: bool):
|
||||||
"""Handle removal when multiple matches are found"""
|
"""Handle removal when multiple matches are found"""
|
||||||
@@ -672,34 +585,19 @@ Examples:
|
|||||||
for i, match in enumerate(matches, 1):
|
for i, match in enumerate(matches, 1):
|
||||||
project_path = match["project_path"]
|
project_path = match["project_path"]
|
||||||
is_current = match["is_current"]
|
is_current = match["is_current"]
|
||||||
kind = match.get("kind", "cli")
|
|
||||||
|
|
||||||
if is_current:
|
if is_current:
|
||||||
print(f" {i}. 🏠 Current project ({'CLI' if kind == 'cli' else 'APP'})")
|
print(f" {i}. 🏠 Current project")
|
||||||
|
print(f" 📍 {project_path}")
|
||||||
else:
|
else:
|
||||||
print(f" {i}. 📂 {project_path.name} ({'CLI' if kind == 'cli' else 'APP'})")
|
print(f" {i}. 📂 {project_path.name}")
|
||||||
|
print(f" 📍 {project_path}")
|
||||||
# Show path details
|
|
||||||
if kind == "cli":
|
|
||||||
print(f" 📍 {project_path / '.leann' / 'indexes' / index_name}")
|
|
||||||
else:
|
|
||||||
print(f" 📍 {match['meta_file']}")
|
|
||||||
|
|
||||||
# Show size info
|
# Show size info
|
||||||
try:
|
try:
|
||||||
if kind == "cli":
|
size_mb = sum(
|
||||||
size_mb = sum(
|
f.stat().st_size for f in match["index_dir"].iterdir() if f.is_file()
|
||||||
f.stat().st_size for f in match["index_dir"].iterdir() if f.is_file()
|
) / (1024 * 1024)
|
||||||
) / (1024 * 1024)
|
|
||||||
else:
|
|
||||||
file_base = match.get("file_base")
|
|
||||||
size_mb = 0.0
|
|
||||||
if file_base:
|
|
||||||
size_mb = sum(
|
|
||||||
f.stat().st_size
|
|
||||||
for f in match["files_dir"].glob(f"{file_base}.leann*")
|
|
||||||
if f.is_file()
|
|
||||||
) / (1024 * 1024)
|
|
||||||
print(f" 📦 Size: {size_mb:.1f} MB")
|
print(f" 📦 Size: {size_mb:.1f} MB")
|
||||||
except (OSError, PermissionError):
|
except (OSError, PermissionError):
|
||||||
pass
|
pass
|
||||||
@@ -723,8 +621,8 @@ Examples:
|
|||||||
if 0 <= choice_idx < len(matches):
|
if 0 <= choice_idx < len(matches):
|
||||||
selected_match = matches[choice_idx]
|
selected_match = matches[choice_idx]
|
||||||
project_path = selected_match["project_path"]
|
project_path = selected_match["project_path"]
|
||||||
|
index_dir = selected_match["index_dir"]
|
||||||
is_current = selected_match["is_current"]
|
is_current = selected_match["is_current"]
|
||||||
kind = selected_match.get("kind", "cli")
|
|
||||||
|
|
||||||
location = "current project" if is_current else f"'{project_path.name}' project"
|
location = "current project" if is_current else f"'{project_path.name}' project"
|
||||||
print(f" 🎯 Selected: Remove from {location}")
|
print(f" 🎯 Selected: Remove from {location}")
|
||||||
@@ -737,22 +635,9 @@ Examples:
|
|||||||
print(" ❌ Confirmation failed. Removal cancelled.")
|
print(" ❌ Confirmation failed. Removal cancelled.")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if kind == "cli":
|
return self._delete_index_directory(
|
||||||
return self._delete_index_directory(
|
index_dir, index_name, project_path if not is_current else None
|
||||||
selected_match["index_dir"],
|
)
|
||||||
index_name,
|
|
||||||
project_path if not is_current else None,
|
|
||||||
is_app=False,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
return self._delete_index_directory(
|
|
||||||
selected_match["files_dir"],
|
|
||||||
selected_match.get("display_name", index_name),
|
|
||||||
project_path if not is_current else None,
|
|
||||||
is_app=True,
|
|
||||||
meta_file=selected_match.get("meta_file"),
|
|
||||||
app_file_base=selected_match.get("file_base"),
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
print(" ❌ Invalid choice. Removal cancelled.")
|
print(" ❌ Invalid choice. Removal cancelled.")
|
||||||
return False
|
return False
|
||||||
@@ -762,65 +647,21 @@ Examples:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def _delete_index_directory(
|
def _delete_index_directory(
|
||||||
self,
|
self, index_dir: Path, index_name: str, project_path: Optional[Path] = None
|
||||||
index_dir: Path,
|
|
||||||
index_display_name: str,
|
|
||||||
project_path: Optional[Path] = None,
|
|
||||||
is_app: bool = False,
|
|
||||||
meta_file: Optional[Path] = None,
|
|
||||||
app_file_base: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
"""Delete a CLI index directory or APP index files safely."""
|
"""Actually delete the index directory"""
|
||||||
try:
|
try:
|
||||||
if is_app:
|
import shutil
|
||||||
removed = 0
|
|
||||||
errors = 0
|
|
||||||
# Delete only files that belong to this app index (based on file base)
|
|
||||||
pattern_base = app_file_base or ""
|
|
||||||
for f in index_dir.glob(f"{pattern_base}.leann*"):
|
|
||||||
try:
|
|
||||||
f.unlink()
|
|
||||||
removed += 1
|
|
||||||
except Exception:
|
|
||||||
errors += 1
|
|
||||||
# Best-effort: also remove the meta file if specified and still exists
|
|
||||||
if meta_file and meta_file.exists():
|
|
||||||
try:
|
|
||||||
meta_file.unlink()
|
|
||||||
removed += 1
|
|
||||||
except Exception:
|
|
||||||
errors += 1
|
|
||||||
|
|
||||||
if removed > 0 and errors == 0:
|
shutil.rmtree(index_dir)
|
||||||
if project_path:
|
|
||||||
print(
|
if project_path:
|
||||||
f"✅ App index '{index_display_name}' removed from {project_path.name}"
|
print(f"✅ Index '{index_name}' removed from {project_path.name}")
|
||||||
)
|
|
||||||
else:
|
|
||||||
print(f"✅ App index '{index_display_name}' removed successfully")
|
|
||||||
return True
|
|
||||||
elif removed > 0 and errors > 0:
|
|
||||||
print(
|
|
||||||
f"⚠️ App index '{index_display_name}' partially removed (some files couldn't be deleted)"
|
|
||||||
)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
print(
|
|
||||||
f"❌ No files found to remove for app index '{index_display_name}' in {index_dir}"
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
else:
|
else:
|
||||||
import shutil
|
print(f"✅ Index '{index_name}' removed successfully")
|
||||||
|
return True
|
||||||
shutil.rmtree(index_dir)
|
|
||||||
|
|
||||||
if project_path:
|
|
||||||
print(f"✅ Index '{index_display_name}' removed from {project_path.name}")
|
|
||||||
else:
|
|
||||||
print(f"✅ Index '{index_display_name}' removed successfully")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"❌ Error removing index '{index_display_name}': {e}")
|
print(f"❌ Error removing index '{index_name}': {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def load_documents(
|
def load_documents(
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann"
|
name = "leann"
|
||||||
version = "0.3.1"
|
version = "0.3.0"
|
||||||
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
Reference in New Issue
Block a user