Files
LEANN/pyproject.toml
yichuan520030910320 2d58650b04 finish ast fork
2025-09-08 14:34:41 -07:00

165 lines
4.4 KiB
TOML

[build-system]
requires = ["setuptools>=61.0", "cmake>=3.24"]
build-backend = "setuptools.build_meta"
[project]
name = "leann-workspace"
version = "0.1.0"
requires-python = ">=3.9"
dependencies = [
"leann-core",
"leann-backend-hnsw",
"typer>=0.12.3",
"numpy>=1.26.0",
"torch",
"tqdm",
"datasets>=2.15.0",
"evaluate",
"colorama",
"boto3",
"protobuf==4.25.3",
"sglang",
"ollama",
"requests>=2.25.0",
"sentence-transformers>=2.2.0",
"openai>=1.0.0",
# PDF parsing dependencies - essential for document processing
"PyPDF2>=3.0.0",
"pdfplumber>=0.11.0",
"pymupdf>=1.26.0",
"pypdfium2>=4.30.0",
# LlamaIndex core and readers - updated versions
"llama-index>=0.12.44",
"llama-index-readers-file>=0.4.0", # Essential for PDF parsing
# "llama-index-readers-docling", # Requires Python >= 3.10
# "llama-index-node-parser-docling", # Requires Python >= 3.10
"llama-index-vector-stores-faiss>=0.4.0",
"llama-index-embeddings-huggingface>=0.5.5",
# Other dependencies
"ipykernel==6.29.5",
"msgpack>=1.1.1",
"mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'",
"mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'",
"psutil>=5.8.0",
"pybind11>=3.0.0",
"pathspec>=0.12.1",
"nbconvert>=7.16.6",
"gitignore-parser>=0.1.12",
# AST-aware code chunking dependencies
"astchunk>=0.1.0",
"tree-sitter>=0.20.0",
"tree-sitter-python>=0.20.0",
"tree-sitter-java>=0.20.0",
"tree-sitter-c-sharp>=0.20.0",
"tree-sitter-typescript>=0.20.0",
]
[project.optional-dependencies]
dev = [
"pytest>=7.0",
"pytest-cov>=4.0",
"pytest-xdist>=3.0", # For parallel test execution
"black>=23.0",
"ruff==0.12.7", # Fixed version to ensure consistent formatting across all environments
"matplotlib",
"huggingface-hub>=0.20.0",
"pre-commit>=3.5.0",
]
test = [
"pytest>=7.0",
"pytest-timeout>=2.0",
"llama-index-core>=0.12.0",
"python-dotenv>=1.0.0",
]
diskann = [
"leann-backend-diskann",
]
# Add a new optional dependency group for document processing
documents = [
"beautifulsoup4>=4.13.0", # For HTML parsing
"python-docx>=0.8.11", # For Word documents
"openpyxl>=3.1.0", # For Excel files
"pandas>=2.2.0", # For data processing
]
[tool.setuptools]
py-modules = []
packages = ["wechat_exporter"]
package-dir = { "wechat_exporter" = "packages/wechat-exporter" }
[project.scripts]
wechat-exporter = "wechat_exporter.main:main"
[tool.uv.sources]
leann-core = { path = "packages/leann-core", editable = true }
leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = true }
leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }
astchunk = { path = "packages/astchunk-leann", editable = true }
[tool.ruff]
target-version = "py39"
line-length = 100
extend-exclude = ["third_party"]
[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"UP", # pyupgrade
"N", # pep8-naming
"RUF", # ruff-specific rules
]
ignore = [
"E501", # line too long (handled by formatter)
"B008", # do not perform function calls in argument defaults
"B904", # raise without from
"N812", # lowercase imported as non-lowercase
"N806", # variable in function should be lowercase
"RUF012", # mutable class attributes should be annotated with typing.ClassVar
]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
skip-magic-trailing-comma = false
line-ending = "auto"
[tool.lychee]
accept = ["200", "403", "429", "503"]
timeout = 20
max_retries = 2
exclude = ["localhost", "127.0.0.1", "example.com"]
exclude_path = [".git/", ".venv/", "__pycache__/", "third_party/"]
scheme = ["https", "http"]
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_classes = ["Test*"]
python_functions = ["test_*"]
markers = [
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
"openai: marks tests that require OpenAI API key",
]
timeout = 300 # Reduced from 600s (10min) to 300s (5min) for CI safety
addopts = [
"-v",
"--tb=short",
"--strict-markers",
"--disable-warnings",
]
env = [
"HF_HUB_DISABLE_SYMLINKS=1",
"TOKENIZERS_PARALLELISM=false",
]