chore: use http url of astchunk; use group for some dev deps

This commit is contained in:
Andy Lee
2025-09-15 21:21:09 -07:00
parent 07e4f176e1
commit ac560964f5
9 changed files with 74 additions and 108 deletions

View File

@@ -26,21 +26,14 @@ jobs:
- name: Install uv
uses: astral-sh/setup-uv@v4
- name: Install pre-commit
- name: Sync lint tools via uv groups (tools only)
run: |
uv pip install --system pre-commit
uv sync --only-group lint
- name: Run pre-commit hooks (all files)
run: |
pre-commit run --all-files --show-diff-on-failure
uv run pre-commit run --all-files --show-diff-on-failure
- name: Run ruff check
run: |
ruff check .
- name: Run ruff format check
run: |
ruff format --check .
build:
needs: lint
@@ -316,15 +309,15 @@ jobs:
uv venv --python ${{ matrix.python }}
source .venv/bin/activate || source .venv/Scripts/activate
# Install packages using --find-links to prioritize local builds
# Install test tools only first, without including the project itself
uv sync --only-group test
# Now install the built wheels (these are the artifacts under test)
uv pip install --find-links packages/leann-core/dist --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist packages/leann-core/dist/*.whl || uv pip install --find-links packages/leann-core/dist packages/leann-core/dist/*.tar.gz
uv pip install --find-links packages/leann-core/dist packages/leann-backend-hnsw/dist/*.whl
uv pip install --find-links packages/leann-core/dist packages/leann-backend-diskann/dist/*.whl
uv pip install packages/leann/dist/*.whl || uv pip install packages/leann/dist/*.tar.gz
# Install test dependencies using extras
uv pip install -e ".[test]"
- name: Run tests with pytest
env:
CI: true

2
.gitmodules vendored
View File

@@ -16,5 +16,5 @@
url = https://github.com/zeromq/libzmq.git
[submodule "packages/astchunk-leann"]
path = packages/astchunk-leann
url = git@github.com:yichuan-w/astchunk-leann.git
url = https://github.com/yichuan-w/astchunk-leann.git
branch = main

View File

@@ -706,9 +706,8 @@ results = searcher.search("bananacrocodile", use_grep=True, top_k=1)
## Reproduce Our Results
```bash
uv pip install -e ".[dev]" # Install dev dependencies
python benchmarks/run_evaluation.py # Will auto-download evaluation data and run benchmarks
python benchmarks/run_evaluation.py benchmarks/data/indices/rpj_wiki/rpj_wiki --num-queries 2000 # After downloading data, you can run the benchmark with our biggest index
uv run benchmarks/run_evaluation.py # Will auto-download evaluation data and run benchmarks
uv run benchmarks/run_evaluation.py benchmarks/data/indices/rpj_wiki/rpj_wiki --num-queries 2000 # After downloading data, you can run the benchmark with our biggest index
```
The evaluation script downloads data automatically on first run. The last three results were tested with partial personal data, and you can reproduce them with your own data!

View File

@@ -53,7 +53,7 @@ def download_data_if_needed(data_root: Path, download_embeddings: bool = False):
print(
"Error: huggingface_hub is not installed. Please install it to download the data:"
)
print("uv pip install -e '.[dev]'")
print("uv sync --only-group dev")
sys.exit(1)
except Exception as e:
print(f"An error occurred during data download: {e}")

View File

@@ -53,9 +53,9 @@ We use pre-commit hooks to ensure code quality and consistency. This runs automa
### Setup Pre-commit
1. **Install pre-commit** (already included when you run `uv sync`):
1. **Install pre-commit tools**:
```bash
uv pip install pre-commit
uv sync lint
```
2. **Install the git hooks**:
@@ -65,7 +65,7 @@ We use pre-commit hooks to ensure code quality and consistency. This runs automa
3. **Run pre-commit manually** (optional):
```bash
pre-commit run --all-files
uv run pre-commit run --all-files
```
### Pre-commit Checks
@@ -85,6 +85,9 @@ Our pre-commit configuration includes:
### Running Tests
```bash
# Install test tools only (no project runtime)
uv sync --group test
# Run all tests
uv run pytest

View File

@@ -57,24 +57,6 @@ dependencies = [
]
[project.optional-dependencies]
dev = [
"pytest>=7.0",
"pytest-cov>=4.0",
"pytest-xdist>=3.0", # For parallel test execution
"black>=23.0",
"ruff==0.12.7", # Fixed version to ensure consistent formatting across all environments
"matplotlib",
"huggingface-hub>=0.20.0",
"pre-commit>=3.5.0",
]
test = [
"pytest>=7.0",
"pytest-timeout>=2.0",
"llama-index-core>=0.12.0",
"python-dotenv>=1.0.0",
]
diskann = [
"leann-backend-diskann",
]
@@ -102,6 +84,28 @@ leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = tr
leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true }
astchunk = { path = "packages/astchunk-leann", editable = true }
[dependency-groups]
# Minimal lint toolchain for CI and local hooks
lint = [
"pre-commit>=3.5.0",
"ruff==0.12.7", # Fixed version to ensure consistent formatting across all environments
]
# Test toolchain (no heavy project runtime deps)
test = [
"pytest>=7.0",
"pytest-cov>=4.0",
"pytest-xdist>=3.0",
"pytest-timeout>=2.0",
"python-dotenv>=1.0.0",
]
# dependencies by apps/ should list here
dev = [
"matplotlib",
"huggingface-hub>=0.20.0",
]
[tool.ruff]
target-version = "py39"
line-length = 100

View File

@@ -40,8 +40,8 @@ Tests DiskANN graph partitioning functionality:
### Install test dependencies:
```bash
# Using extras
uv pip install -e ".[test]"
# Using uv dependency groups (tools only)
uv sync --only-group test
```
### Run all tests:

97
uv.lock generated
View File

@@ -1,5 +1,5 @@
version = 1
revision = 2
revision = 3
requires-python = ">=3.9"
resolution-markers = [
"python_full_version >= '3.12'",
@@ -297,45 +297,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload-time = "2025-04-15T17:05:12.221Z" },
]
[[package]]
name = "black"
version = "25.1.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "click", version = "8.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
{ name = "mypy-extensions" },
{ name = "packaging" },
{ name = "pathspec" },
{ name = "platformdirs" },
{ name = "tomli", marker = "python_full_version < '3.11'" },
{ name = "typing-extensions", marker = "python_full_version < '3.11'" },
]
sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4d/3b/4ba3f93ac8d90410423fdd31d7541ada9bcee1df32fb90d26de41ed40e1d/black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32", size = 1629419, upload-time = "2025-01-29T05:37:06.642Z" },
{ url = "https://files.pythonhosted.org/packages/b4/02/0bde0485146a8a5e694daed47561785e8b77a0466ccc1f3e485d5ef2925e/black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da", size = 1461080, upload-time = "2025-01-29T05:37:09.321Z" },
{ url = "https://files.pythonhosted.org/packages/52/0e/abdf75183c830eaca7589144ff96d49bce73d7ec6ad12ef62185cc0f79a2/black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7", size = 1766886, upload-time = "2025-01-29T04:18:24.432Z" },
{ url = "https://files.pythonhosted.org/packages/dc/a6/97d8bb65b1d8a41f8a6736222ba0a334db7b7b77b8023ab4568288f23973/black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9", size = 1419404, upload-time = "2025-01-29T04:19:04.296Z" },
{ url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" },
{ url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" },
{ url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" },
{ url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" },
{ url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" },
{ url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" },
{ url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" },
{ url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" },
{ url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" },
{ url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" },
{ url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" },
{ url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" },
{ url = "https://files.pythonhosted.org/packages/d3/b6/ae7507470a4830dbbfe875c701e84a4a5fb9183d1497834871a715716a92/black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0", size = 1628593, upload-time = "2025-01-29T05:37:23.672Z" },
{ url = "https://files.pythonhosted.org/packages/24/c1/ae36fa59a59f9363017ed397750a0cd79a470490860bc7713967d89cdd31/black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f", size = 1460000, upload-time = "2025-01-29T05:37:25.829Z" },
{ url = "https://files.pythonhosted.org/packages/ac/b6/98f832e7a6c49aa3a464760c67c7856363aa644f2f3c74cf7d624168607e/black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e", size = 1765963, upload-time = "2025-01-29T04:18:38.116Z" },
{ url = "https://files.pythonhosted.org/packages/ce/e9/2cb0a017eb7024f70e0d2e9bdb8c5a5b078c5740c7f8816065d06f04c557/black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355", size = 1419419, upload-time = "2025-01-29T04:18:30.191Z" },
{ url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" },
]
[[package]]
name = "bleach"
version = "6.2.0"
@@ -2290,17 +2251,6 @@ dependencies = [
]
[package.optional-dependencies]
dev = [
{ name = "black" },
{ name = "huggingface-hub" },
{ name = "matplotlib", version = "3.9.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "matplotlib", version = "3.10.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
{ name = "pre-commit" },
{ name = "pytest" },
{ name = "pytest-cov" },
{ name = "pytest-xdist" },
{ name = "ruff" },
]
diskann = [
{ name = "leann-backend-diskann" },
]
@@ -2310,10 +2260,22 @@ documents = [
{ name = "pandas" },
{ name = "python-docx" },
]
[package.dev-dependencies]
dev = [
{ name = "huggingface-hub" },
{ name = "matplotlib", version = "3.9.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
{ name = "matplotlib", version = "3.10.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
]
lint = [
{ name = "pre-commit" },
{ name = "ruff" },
]
test = [
{ name = "llama-index-core" },
{ name = "pytest" },
{ name = "pytest-cov" },
{ name = "pytest-timeout" },
{ name = "pytest-xdist" },
{ name = "python-dotenv" },
]
@@ -2321,23 +2283,19 @@ test = [
requires-dist = [
{ name = "astchunk", editable = "packages/astchunk-leann" },
{ name = "beautifulsoup4", marker = "extra == 'documents'", specifier = ">=4.13.0" },
{ name = "black", marker = "extra == 'dev'", specifier = ">=23.0" },
{ name = "boto3" },
{ name = "colorama" },
{ name = "datasets", specifier = ">=2.15.0" },
{ name = "evaluate" },
{ name = "gitignore-parser", specifier = ">=0.1.12" },
{ name = "huggingface-hub", marker = "extra == 'dev'", specifier = ">=0.20.0" },
{ name = "ipykernel", specifier = "==6.29.5" },
{ name = "leann-backend-diskann", marker = "extra == 'diskann'", editable = "packages/leann-backend-diskann" },
{ name = "leann-backend-hnsw", editable = "packages/leann-backend-hnsw" },
{ name = "leann-core", editable = "packages/leann-core" },
{ name = "llama-index", specifier = ">=0.12.44" },
{ name = "llama-index-core", marker = "extra == 'test'", specifier = ">=0.12.0" },
{ name = "llama-index-embeddings-huggingface", specifier = ">=0.5.5" },
{ name = "llama-index-readers-file", specifier = ">=0.4.0" },
{ name = "llama-index-vector-stores-faiss", specifier = ">=0.4.0" },
{ name = "matplotlib", marker = "extra == 'dev'" },
{ name = "mlx", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'", specifier = ">=0.26.3" },
{ name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'", specifier = ">=0.26.0" },
{ name = "msgpack", specifier = ">=1.1.1" },
@@ -2349,22 +2307,14 @@ requires-dist = [
{ name = "pandas", marker = "extra == 'documents'", specifier = ">=2.2.0" },
{ name = "pathspec", specifier = ">=0.12.1" },
{ name = "pdfplumber", specifier = ">=0.11.0" },
{ name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.5.0" },
{ name = "protobuf", specifier = "==4.25.3" },
{ name = "psutil", specifier = ">=5.8.0" },
{ name = "pybind11", specifier = ">=3.0.0" },
{ name = "pymupdf", specifier = ">=1.26.0" },
{ name = "pypdf2", specifier = ">=3.0.0" },
{ name = "pypdfium2", specifier = ">=4.30.0" },
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
{ name = "pytest", marker = "extra == 'test'", specifier = ">=7.0" },
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" },
{ name = "pytest-timeout", marker = "extra == 'test'", specifier = ">=2.0" },
{ name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.0" },
{ name = "python-docx", marker = "extra == 'documents'", specifier = ">=0.8.11" },
{ name = "python-dotenv", marker = "extra == 'test'", specifier = ">=1.0.0" },
{ name = "requests", specifier = ">=2.25.0" },
{ name = "ruff", marker = "extra == 'dev'", specifier = "==0.12.7" },
{ name = "sentence-transformers", specifier = ">=2.2.0" },
{ name = "sglang" },
{ name = "torch" },
@@ -2377,7 +2327,24 @@ requires-dist = [
{ name = "tree-sitter-typescript", specifier = ">=0.20.0" },
{ name = "typer", specifier = ">=0.12.3" },
]
provides-extras = ["dev", "test", "diskann", "documents"]
provides-extras = ["diskann", "documents"]
[package.metadata.requires-dev]
dev = [
{ name = "huggingface-hub", specifier = ">=0.20.0" },
{ name = "matplotlib" },
]
lint = [
{ name = "pre-commit", specifier = ">=3.5.0" },
{ name = "ruff", specifier = "==0.12.7" },
]
test = [
{ name = "pytest", specifier = ">=7.0" },
{ name = "pytest-cov", specifier = ">=4.0" },
{ name = "pytest-timeout", specifier = ">=2.0" },
{ name = "pytest-xdist", specifier = ">=3.0" },
{ name = "python-dotenv", specifier = ">=1.0.0" },
]
[[package]]
name = "llama-cloud"