LEANN/pyproject.toml

[build-system]
requires = ["setuptools>=61.0", "cmake>=3.24"]
build-backend = "setuptools.build_meta"

[project]
name = "leann-workspace"
version = "0.1.0"
requires-python = ">=3.9"

dependencies = [
    "leann-core",
    "leann-backend-hnsw",
    "numpy>=1.26.0",
    "torch",
    "tqdm",
    "flask",
    "flask_compress",
    "datasets>=2.15.0",
    "evaluate",
    "colorama",
    "boto3",
    "protobuf==4.25.3",
    "sglang",
    "ollama",
    "requests>=2.25.0",
    "sentence-transformers>=2.2.0",
    "openai>=1.0.0",
    # PDF parsing dependencies - essential for document processing
    "PyPDF2>=3.0.0",
    "pdfplumber>=0.11.0",
    "pymupdf>=1.26.0",
    "pypdfium2>=4.30.0",
    # LlamaIndex core and readers - updated versions
    "llama-index>=0.12.44",
    "llama-index-readers-file>=0.4.0",  # Essential for PDF parsing
    # "llama-index-readers-docling",  # Requires Python >= 3.10
    # "llama-index-node-parser-docling",  # Requires Python >= 3.10
    "llama-index-vector-stores-faiss>=0.4.0",
    "llama-index-embeddings-huggingface>=0.5.5",
    # Other dependencies
    "ipykernel==6.29.5",
    "msgpack>=1.1.1",
    "mlx>=0.26.3; sys_platform == 'darwin'",
    "mlx-lm>=0.26.0; sys_platform == 'darwin'",
    "psutil>=5.8.0",
]

[project.optional-dependencies]
dev = [
    "pytest>=7.0",
    "pytest-cov>=4.0",
    "pytest-xdist>=3.0",  # For parallel test execution
    "black>=23.0",
    "ruff>=0.1.0",
    "matplotlib",
    "huggingface-hub>=0.20.0",
    "pre-commit>=3.5.0",
]

test = [
    "pytest>=7.0",
    "pytest-timeout>=2.0",
    "llama-index-core>=0.12.0",
    "llama-index-readers-file>=0.4.0",
    "python-dotenv>=1.0.0",
    "sentence-transformers>=2.2.0",
]

diskann = [
    "leann-backend-diskann",
]

# Add a new optional dependency group for document processing
documents = [
    "beautifulsoup4>=4.13.0",  # For HTML parsing
    "python-docx>=0.8.11",     # For Word documents
    "openpyxl>=3.1.0",         # For Excel files
    "pandas>=2.2.0",           # For data processing
]

[tool.setuptools]
py-modules = []


[tool.uv.sources]
leann-core = { path = "packages/leann-core", editable = true }
leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = true }
leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }

[tool.ruff]
target-version = "py310"
line-length = 100
extend-exclude = [
    "third_party",
    "*.egg-info",
    "__pycache__",
    ".git",
    ".venv",
]

[tool.ruff.lint]
select = [
    "E",      # pycodestyle errors
    "W",      # pycodestyle warnings
    "F",      # pyflakes
    "I",      # isort
    "B",      # flake8-bugbear
    "C4",     # flake8-comprehensions
    "UP",     # pyupgrade
    "N",      # pep8-naming
    "RUF",    # ruff-specific rules
]
ignore = [
    "E501",   # line too long (handled by formatter)
    "B008",   # do not perform function calls in argument defaults
    "B904",   # raise without from
    "N812",   # lowercase imported as non-lowercase
    "N806",   # variable in function should be lowercase
    "RUF012", # mutable class attributes should be annotated with typing.ClassVar
]

[tool.ruff.lint.per-file-ignores]
"test/**/*.py" = ["E402"]      # module level import not at top of file (common in tests)
"examples/**/*.py" = ["E402"]  # module level import not at top of file (common in examples)

[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"

[dependency-groups]
dev = [
    "ruff>=0.12.4",
]

[tool.lychee]
accept = ["200", "403", "429", "503"]
timeout = 20
max_retries = 2
exclude = ["localhost", "127.0.0.1", "example.com"]
exclude_path = [".git/", ".venv/", "__pycache__/", "third_party/"]
scheme = ["https", "http"]

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
markers = [
    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
    "openai: marks tests that require OpenAI API key",
]
timeout = 600
addopts = [
    "-v",
    "--tb=short",
    "--strict-markers",
    "--disable-warnings",
]
env = [
    "HF_HUB_DISABLE_SYMLINKS=1",
    "TOKENIZERS_PARALLELISM=false",
]