Compare commits
11 Commits
v0.2.2
...
feature/cl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b55eeeae5f | ||
|
|
e890b2311f | ||
|
|
f3d99fd118 | ||
|
|
8eee90bf80 | ||
|
|
649d4ad03e | ||
|
|
d9b6f195c5 | ||
|
|
00f506c0bd | ||
|
|
e872dd1d23 | ||
|
|
063c687ff7 | ||
|
|
bb8ecd54d7 | ||
|
|
716217ae24 |
@@ -4,8 +4,8 @@ build-backend = "scikit_build_core.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-backend-diskann"
|
name = "leann-backend-diskann"
|
||||||
version = "0.2.2"
|
version = "0.2.1"
|
||||||
dependencies = ["leann-core==0.2.2", "numpy", "protobuf>=3.19.0"]
|
dependencies = ["leann-core==0.2.1", "numpy", "protobuf>=3.19.0"]
|
||||||
|
|
||||||
[tool.scikit-build]
|
[tool.scikit-build]
|
||||||
# Key: simplified CMake path
|
# Key: simplified CMake path
|
||||||
|
|||||||
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: b2dc4ea2c7...67a2611ad1
@@ -6,10 +6,10 @@ build-backend = "scikit_build_core.build"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-backend-hnsw"
|
name = "leann-backend-hnsw"
|
||||||
version = "0.2.2"
|
version = "0.2.1"
|
||||||
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
description = "Custom-built HNSW (Faiss) backend for the Leann toolkit."
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"leann-core==0.2.2",
|
"leann-core==0.2.1",
|
||||||
"numpy",
|
"numpy",
|
||||||
"pyzmq>=23.0.0",
|
"pyzmq>=23.0.0",
|
||||||
"msgpack>=1.0.0",
|
"msgpack>=1.0.0",
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann-core"
|
name = "leann-core"
|
||||||
version = "0.2.2"
|
version = "0.2.1"
|
||||||
description = "Core API and plugin system for LEANN"
|
description = "Core API and plugin system for LEANN"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
@@ -74,11 +74,10 @@ class LeannCLI:
|
|||||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
epilog="""
|
epilog="""
|
||||||
Examples:
|
Examples:
|
||||||
leann build my-docs --docs ./documents # Build index named my-docs
|
leann build my-docs --docs ./documents # Build index named my-docs
|
||||||
leann build my-ppts --docs ./ --file-types .pptx,.pdf # Index only PowerPoint and PDF files
|
leann search my-docs "query" # Search in my-docs index
|
||||||
leann search my-docs "query" # Search in my-docs index
|
leann ask my-docs "question" # Ask my-docs index
|
||||||
leann ask my-docs "question" # Ask my-docs index
|
leann list # List all stored indexes
|
||||||
leann list # List all stored indexes
|
|
||||||
""",
|
""",
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -100,11 +99,6 @@ Examples:
|
|||||||
build_parser.add_argument("--num-threads", type=int, default=1)
|
build_parser.add_argument("--num-threads", type=int, default=1)
|
||||||
build_parser.add_argument("--compact", action="store_true", default=True)
|
build_parser.add_argument("--compact", action="store_true", default=True)
|
||||||
build_parser.add_argument("--recompute", action="store_true", default=True)
|
build_parser.add_argument("--recompute", action="store_true", default=True)
|
||||||
build_parser.add_argument(
|
|
||||||
"--file-types",
|
|
||||||
type=str,
|
|
||||||
help="Comma-separated list of file extensions to include (e.g., '.txt,.pdf,.pptx'). If not specified, uses default supported types.",
|
|
||||||
)
|
|
||||||
|
|
||||||
# Search command
|
# Search command
|
||||||
search_parser = subparsers.add_parser("search", help="Search documents")
|
search_parser = subparsers.add_parser("search", help="Search documents")
|
||||||
@@ -114,12 +108,7 @@ Examples:
|
|||||||
search_parser.add_argument("--complexity", type=int, default=64)
|
search_parser.add_argument("--complexity", type=int, default=64)
|
||||||
search_parser.add_argument("--beam-width", type=int, default=1)
|
search_parser.add_argument("--beam-width", type=int, default=1)
|
||||||
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
search_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||||
search_parser.add_argument(
|
search_parser.add_argument("--recompute-embeddings", action="store_true")
|
||||||
"--recompute-embeddings",
|
|
||||||
action="store_true",
|
|
||||||
default=True,
|
|
||||||
help="Recompute embeddings (default: True)",
|
|
||||||
)
|
|
||||||
search_parser.add_argument(
|
search_parser.add_argument(
|
||||||
"--pruning-strategy",
|
"--pruning-strategy",
|
||||||
choices=["global", "local", "proportional"],
|
choices=["global", "local", "proportional"],
|
||||||
@@ -142,12 +131,7 @@ Examples:
|
|||||||
ask_parser.add_argument("--complexity", type=int, default=32)
|
ask_parser.add_argument("--complexity", type=int, default=32)
|
||||||
ask_parser.add_argument("--beam-width", type=int, default=1)
|
ask_parser.add_argument("--beam-width", type=int, default=1)
|
||||||
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
ask_parser.add_argument("--prune-ratio", type=float, default=0.0)
|
||||||
ask_parser.add_argument(
|
ask_parser.add_argument("--recompute-embeddings", action="store_true")
|
||||||
"--recompute-embeddings",
|
|
||||||
action="store_true",
|
|
||||||
default=True,
|
|
||||||
help="Recompute embeddings (default: True)",
|
|
||||||
)
|
|
||||||
ask_parser.add_argument(
|
ask_parser.add_argument(
|
||||||
"--pruning-strategy",
|
"--pruning-strategy",
|
||||||
choices=["global", "local", "proportional"],
|
choices=["global", "local", "proportional"],
|
||||||
@@ -270,10 +254,8 @@ Examples:
|
|||||||
print(f' leann search {example_name} "your query"')
|
print(f' leann search {example_name} "your query"')
|
||||||
print(f" leann ask {example_name} --interactive")
|
print(f" leann ask {example_name} --interactive")
|
||||||
|
|
||||||
def load_documents(self, docs_dir: str, custom_file_types: str | None = None):
|
def load_documents(self, docs_dir: str):
|
||||||
print(f"Loading documents from {docs_dir}...")
|
print(f"Loading documents from {docs_dir}...")
|
||||||
if custom_file_types:
|
|
||||||
print(f"Using custom file types: {custom_file_types}")
|
|
||||||
|
|
||||||
# Try to use better PDF parsers first
|
# Try to use better PDF parsers first
|
||||||
documents = []
|
documents = []
|
||||||
@@ -305,81 +287,66 @@ Examples:
|
|||||||
documents.extend(default_docs)
|
documents.extend(default_docs)
|
||||||
|
|
||||||
# Load other file types with default reader
|
# Load other file types with default reader
|
||||||
if custom_file_types:
|
code_extensions = [
|
||||||
# Parse custom file types from comma-separated string
|
# Original document types
|
||||||
code_extensions = [ext.strip() for ext in custom_file_types.split(",") if ext.strip()]
|
".txt",
|
||||||
# Ensure extensions start with a dot
|
".md",
|
||||||
code_extensions = [ext if ext.startswith(".") else f".{ext}" for ext in code_extensions]
|
".docx",
|
||||||
else:
|
# Code files for Claude Code integration
|
||||||
# Use default supported file types
|
".py",
|
||||||
code_extensions = [
|
".js",
|
||||||
# Original document types
|
".ts",
|
||||||
".txt",
|
".jsx",
|
||||||
".md",
|
".tsx",
|
||||||
".docx",
|
".java",
|
||||||
".pptx",
|
".cpp",
|
||||||
# Code files for Claude Code integration
|
".c",
|
||||||
".py",
|
".h",
|
||||||
".js",
|
".hpp",
|
||||||
".ts",
|
".cs",
|
||||||
".jsx",
|
".go",
|
||||||
".tsx",
|
".rs",
|
||||||
".java",
|
".rb",
|
||||||
".cpp",
|
".php",
|
||||||
".c",
|
".swift",
|
||||||
".h",
|
".kt",
|
||||||
".hpp",
|
".scala",
|
||||||
".cs",
|
".r",
|
||||||
".go",
|
".sql",
|
||||||
".rs",
|
".sh",
|
||||||
".rb",
|
".bash",
|
||||||
".php",
|
".zsh",
|
||||||
".swift",
|
".fish",
|
||||||
".kt",
|
".ps1",
|
||||||
".scala",
|
".bat",
|
||||||
".r",
|
# Config and markup files
|
||||||
".sql",
|
".json",
|
||||||
".sh",
|
".yaml",
|
||||||
".bash",
|
".yml",
|
||||||
".zsh",
|
".xml",
|
||||||
".fish",
|
".toml",
|
||||||
".ps1",
|
".ini",
|
||||||
".bat",
|
".cfg",
|
||||||
# Config and markup files
|
".conf",
|
||||||
".json",
|
".html",
|
||||||
".yaml",
|
".css",
|
||||||
".yml",
|
".scss",
|
||||||
".xml",
|
".less",
|
||||||
".toml",
|
".vue",
|
||||||
".ini",
|
".svelte",
|
||||||
".cfg",
|
# Data science
|
||||||
".conf",
|
".ipynb",
|
||||||
".html",
|
".R",
|
||||||
".css",
|
".py",
|
||||||
".scss",
|
".jl",
|
||||||
".less",
|
]
|
||||||
".vue",
|
other_docs = SimpleDirectoryReader(
|
||||||
".svelte",
|
docs_dir,
|
||||||
# Data science
|
recursive=True,
|
||||||
".ipynb",
|
encoding="utf-8",
|
||||||
".R",
|
required_exts=code_extensions,
|
||||||
".py",
|
).load_data(show_progress=True)
|
||||||
".jl",
|
documents.extend(other_docs)
|
||||||
]
|
|
||||||
# Try to load other file types, but don't fail if none are found
|
|
||||||
try:
|
|
||||||
other_docs = SimpleDirectoryReader(
|
|
||||||
docs_dir,
|
|
||||||
recursive=True,
|
|
||||||
encoding="utf-8",
|
|
||||||
required_exts=code_extensions,
|
|
||||||
).load_data(show_progress=True)
|
|
||||||
documents.extend(other_docs)
|
|
||||||
except ValueError as e:
|
|
||||||
if "No files found" in str(e):
|
|
||||||
print("No additional files found for other supported types.")
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
|
|
||||||
all_texts = []
|
all_texts = []
|
||||||
|
|
||||||
@@ -457,7 +424,7 @@ Examples:
|
|||||||
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
print(f"Index '{index_name}' already exists. Use --force to rebuild.")
|
||||||
return
|
return
|
||||||
|
|
||||||
all_texts = self.load_documents(docs_dir, args.file_types)
|
all_texts = self.load_documents(docs_dir)
|
||||||
if not all_texts:
|
if not all_texts:
|
||||||
print("No documents found")
|
print("No documents found")
|
||||||
return
|
return
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "leann"
|
name = "leann"
|
||||||
version = "0.2.2"
|
version = "0.2.1"
|
||||||
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.9"
|
requires-python = ">=3.9"
|
||||||
|
|||||||
Reference in New Issue
Block a user