Compare commits
11 Commits
v0.2.2
...
feature/cl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b55eeeae5f | ||
|
|
e890b2311f | ||
|
|
f3d99fd118 | ||
|
|
8eee90bf80 | ||
|
|
649d4ad03e | ||
|
|
d9b6f195c5 | ||
|
|
00f506c0bd | ||
|
|
e872dd1d23 | ||
|
|
063c687ff7 | ||
|
|
bb8ecd54d7 | ||
|
|
716217ae24 |
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-backend-diskann"
|
name = "leann-backend-diskann"
|
||||||
version = "0.2.2"
|
version = "0.2.1"
|
||||||
dependencies = ["leann-core==0.2.2", "numpy", "protobuf>=3.19.0"]
|
dependencies = ["leann-core==0.2.1", "numpy", "protobuf>=3.19.0"]
|
||||||
|
|
||||||
[tool.scikit-build]
|
[tool.scikit-build]
|
||||||
# Key: simplified CMake path
|
# Key: simplified CMake path
|
||||||
|
|||||||
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: b2dc4ea2c7...67a2611ad1
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-backend-hnsw"
|
name = "leann-backend-hnsw"
|
||||||
version = "0.2.2"
|
version = "0.2.1"
|
||||||
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"leann-core==0.2.2",
|
"leann-core==0.2.1",
|
||||||
"numpy",
|
"numpy",
|
||||||
"pyzmq>=23.0.0",
|
"pyzmq>=23.0.0",
|
||||||
"msgpack>=1.0.0",
|
"msgpack>=1.0.0",
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-core"
|
name = "leann-core"
|
||||||
version = "0.2.2"
|
version = "0.2.1"
|
||||||
description = "Core API and plugin system for LEANN"
|
description = "Core API and plugin system for LEANN"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
@@ -75,7 +75,6 @@ class LeannCLI:
|
|||||||
epilog="""
|
epilog="""
|
||||||
Examples:
|
Examples:
|
||||||
leann build my-docs --docs ./documents # Build index named my-docs
|
leann build my-docs --docs ./documents # Build index named my-docs
|
||||||
leann build my-ppts --docs ./ --file-types .pptx,.pdf # Index only PowerPoint and PDF files
|
|
||||||
leann search my-docs "query" # Search in my-docs index
|
leann search my-docs "query" # Search in my-docs index
|
||||||
leann ask my-docs "question" # Ask my-docs index
|
leann ask my-docs "question" # Ask my-docs index
|
||||||
leann list # List all stored indexes
|
leann list # List all stored indexes
|
||||||
@@ -100,11 +99,6 @@ Examples:
|
|||||||
build_parser.add_argument("--num-threads", type=int, default=1)
|
build_parser.add_argument("--num-threads", type=int, default=1)
|
||||||
build_parser.add_argument("--compact", action="store_true", default=True)
|
build_parser.add_argument("--compact", action="store_true", default=True)
|
||||||
build_parser.add_argument("--recompute", action="store_true", default=True)
|
build_parser.add_argument("--recompute", action="store_true", default=True)
|
||||||
build_parser.add_argument(
|
|
||||||
"--file-types",
|
|
||||||
type=str,
|
|
||||||
help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Search command
|
# Search command
|
||||||
search_parser = subparsers.add_parser("search", help="Search documents")
|
search_parser = subparsers.add_parser("search", help="Search documents")
|
||||||
@@ -114,12 +108,7 @@ Examples:
|
|||||||
search_parser.add_argument("--complexity", type=int, default=64)
|
search_parser.add_argument("--complexity", type=int, default=64)
|
||||||
search_parser.add_argument("--beam-width", type=int, default=1)
|
search_parser.add_argument("--beam-width", type=int, default=1)
|
||||||
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||||
search_parser.add_argument(
|
search_parser.add_argument("--recompute-embeddings", action="store_true")
|
||||||
"--recompute-embeddings",
|
|
||||||
action="store_true",
|
|
||||||
default=True,
|
|
||||||
help="Recompute embeddings (default: True)",
|
|
||||||
)
|
|
||||||
search_parser.add_argument(
|
search_parser.add_argument(
|
||||||
"--pruning-strategy",
|
"--pruning-strategy",
|
||||||
choices=["global", "local", "proportional"],
|
choices=["global", "local", "proportional"],
|
||||||
@@ -142,12 +131,7 @@ Examples:
|
|||||||
ask_parser.add_argument("--complexity", type=int, default=32)
|
ask_parser.add_argument("--complexity", type=int, default=32)
|
||||||
ask_parser.add_argument("--beam-width", type=int, default=1)
|
ask_parser.add_argument("--beam-width", type=int, default=1)
|
||||||
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||||
ask_parser.add_argument(
|
ask_parser.add_argument("--recompute-embeddings", action="store_true")
|
||||||
"--recompute-embeddings",
|
|
||||||
action="store_true",
|
|
||||||
default=True,
|
|
||||||
help="Recompute embeddings (default: True)",
|
|
||||||
)
|
|
||||||
ask_parser.add_argument(
|
ask_parser.add_argument(
|
||||||
"--pruning-strategy",
|
"--pruning-strategy",
|
||||||
choices=["global", "local", "proportional"],
|
choices=["global", "local", "proportional"],
|
||||||
@@ -270,10 +254,8 @@ Examples:
|
|||||||
print(f' leann search {example_name} "your query"')
|
print(f' leann search {example_name} "your query"')
|
||||||
print(f" leann ask {example_name} --interactive")
|
print(f" leann ask {example_name} --interactive")
|
||||||
|
|
||||||
def load_documents(self, docs_dir: str, custom_file_types: str | None = None):
|
def load_documents(self, docs_dir: str):
|
||||||
print(f"Loading documents from {docs_dir}...")
|
print(f"Loading documents from {docs_dir}...")
|
||||||
if custom_file_types:
|
|
||||||
print(f"Using custom file types: {custom_file_types}")
|
|
||||||
|
|
||||||
# Try to use better PDF parsers first
|
# Try to use better PDF parsers first
|
||||||
documents = []
|
documents = []
|
||||||
@@ -305,19 +287,11 @@ Examples:
|
|||||||
documents.extend(default_docs)
|
documents.extend(default_docs)
|
||||||
|
|
||||||
# Load other file types with default reader
|
# Load other file types with default reader
|
||||||
if custom_file_types:
|
|
||||||
# Parse custom file types from comma-separated string
|
|
||||||
code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
|
|
||||||
# Ensure extensions start with a dot
|
|
||||||
code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
|
|
||||||
else:
|
|
||||||
# Use default supported file types
|
|
||||||
code_extensions = [
|
code_extensions = [
|
||||||
# Original document types
|
# Original document types
|
||||||
".txt",
|
".txt",
|
||||||
".md",
|
".md",
|
||||||
".docx",
|
".docx",
|
||||||
".pptx",
|
|
||||||
# Code files for Claude Code integration
|
# Code files for Claude Code integration
|
||||||
".py",
|
".py",
|
||||||
".js",
|
".js",
|
||||||
@@ -366,8 +340,6 @@ Examples:
|
|||||||
".py",
|
".py",
|
||||||
".jl",
|
".jl",
|
||||||
]
|
]
|
||||||
# Try to load other file types, but don't fail if none are found
|
|
||||||
try:
|
|
||||||
other_docs = SimpleDirectoryReader(
|
other_docs = SimpleDirectoryReader(
|
||||||
docs_dir,
|
docs_dir,
|
||||||
recursive=True,
|
recursive=True,
|
||||||
@@ -375,11 +347,6 @@ Examples:
|
|||||||
required_exts=code_extensions,
|
required_exts=code_extensions,
|
||||||
).load_data(show_progress=True)
|
).load_data(show_progress=True)
|
||||||
documents.extend(other_docs)
|
documents.extend(other_docs)
|
||||||
except ValueError as e:
|
|
||||||
if "No files found" in str(e):
|
|
||||||
print("No additional files found for other supported types.")
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
all_texts = []
|
all_texts = []
|
||||||
|
|
||||||
@@ -457,7 +424,7 @@ Examples:
|
|||||||
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
||||||
return
|
return
|
||||||
|
|
||||||
all_texts = self.load_documents(docs_dir, args.file_types)
|
all_texts = self.load_documents(docs_dir)
|
||||||
if not all_texts:
|
if not all_texts:
|
||||||
print("No documents found")
|
print("No documents found")
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann"
|
name = "leann"
|
||||||
version = "0.2.2"
|
version = "0.2.1"
|
||||||
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
Reference in New Issue
Block a user