diff --git a/.github/workflows/build-reusable.yml b/.github/workflows/build-reusable.yml index 5a5d645..97f076f 100644 --- a/.github/workflows/build-reusable.yml +++ b/.github/workflows/build-reusable.yml @@ -26,21 +26,14 @@ jobs: - name: Install uv uses: astral-sh/setup-uv@v4 - - name: Install pre-commit + - name: Sync lint tools via uv groups (tools only) run: | - uv pip install --system pre-commit + uv sync --only-group lint - name: Run pre-commit hooks (all files) run: | - pre-commit run --all-files --show-diff-on-failure + uv run pre-commit run --all-files --show-diff-on-failure - - name: Run ruff check - run: | - ruff check . - - - name: Run ruff format check - run: | - ruff format --check . build: needs: lint @@ -316,15 +309,15 @@ jobs: uv venv --python ${{ matrix.python }} source .venv/bin/activate || source .venv/Scripts/activate - # Install packages using --find-links to prioritize local builds + # Install test tools only first, without including the project itself + uv sync --only-group test + + # Now install the built wheels (these are the artifacts under test) uv pip install --find-links packages/leann-core/dist --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist packages/leann-core/dist/*.whl || uv pip install --find-links packages/leann-core/dist packages/leann-core/dist/*.tar.gz uv pip install --find-links packages/leann-core/dist packages/leann-backend-hnsw/dist/*.whl uv pip install --find-links packages/leann-core/dist packages/leann-backend-diskann/dist/*.whl uv pip install packages/leann/dist/*.whl || uv pip install packages/leann/dist/*.tar.gz - # Install test dependencies using extras - uv pip install -e ".[test]" - - name: Run tests with pytest env: CI: true diff --git a/.gitmodules b/.gitmodules index aa2e98e..52acdd5 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,5 +16,5 @@ url = https://github.com/zeromq/libzmq.git [submodule "packages/astchunk-leann"] path = packages/astchunk-leann - url = git@github.com:yichuan-w/astchunk-leann.git + url = https://github.com/yichuan-w/astchunk-leann.git branch = main diff --git a/README.md b/README.md index 90c0b33..044d894 100755 --- a/README.md +++ b/README.md @@ -706,9 +706,8 @@ results = searcher.search("banana‑crocodile", use_grep=True, top_k=1) ## Reproduce Our Results ```bash -uv pip install -e ".[dev]" # Install dev dependencies -python benchmarks/run_evaluation.py # Will auto-download evaluation data and run benchmarks -python benchmarks/run_evaluation.py benchmarks/data/indices/rpj_wiki/rpj_wiki --num-queries 2000 # After downloading data, you can run the benchmark with our biggest index +uv run benchmarks/run_evaluation.py # Will auto-download evaluation data and run benchmarks +uv run benchmarks/run_evaluation.py benchmarks/data/indices/rpj_wiki/rpj_wiki --num-queries 2000 # After downloading data, you can run the benchmark with our biggest index ``` The evaluation script downloads data automatically on first run. The last three results were tested with partial personal data, and you can reproduce them with your own data! diff --git a/benchmarks/run_evaluation.py b/benchmarks/run_evaluation.py index ab4e169..6c34f15 100644 --- a/benchmarks/run_evaluation.py +++ b/benchmarks/run_evaluation.py @@ -53,7 +53,7 @@ def download_data_if_needed(data_root: Path, download_embeddings: bool = False): print( "Error: huggingface_hub is not installed. Please install it to download the data:" ) - print("uv pip install -e '.[dev]'") + print("uv sync --only-group dev") sys.exit(1) except Exception as e: print(f"An error occurred during data download: {e}") diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 4a37e26..1431c7b 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -53,9 +53,9 @@ We use pre-commit hooks to ensure code quality and consistency. This runs automa ### Setup Pre-commit -1. **Install pre-commit** (already included when you run `uv sync`): +1. **Install pre-commit tools**: ```bash - uv pip install pre-commit + uv sync lint ``` 2. **Install the git hooks**: @@ -65,7 +65,7 @@ We use pre-commit hooks to ensure code quality and consistency. This runs automa 3. **Run pre-commit manually** (optional): ```bash - pre-commit run --all-files + uv run pre-commit run --all-files ``` ### Pre-commit Checks @@ -85,6 +85,9 @@ Our pre-commit configuration includes: ### Running Tests ```bash +# Install test tools only (no project runtime) +uv sync --group test + # Run all tests uv run pytest diff --git a/packages/astchunk-leann b/packages/astchunk-leann index a453701..ad9afa0 160000 --- a/packages/astchunk-leann +++ b/packages/astchunk-leann @@ -1 +1 @@ -Subproject commit a4537018a329ba96f187b1d97c15abd1a04b8093 +Subproject commit ad9afa07b985e1faa5e24eecd9297a19064de31f diff --git a/pyproject.toml b/pyproject.toml index a0d83bf..ae65263 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -57,24 +57,6 @@ dependencies = [ ] [project.optional-dependencies] -dev = [ - "pytest>=7.0", - "pytest-cov>=4.0", - "pytest-xdist>=3.0", # For parallel test execution - "black>=23.0", - "ruff==0.12.7", # Fixed version to ensure consistent formatting across all environments - "matplotlib", - "huggingface-hub>=0.20.0", - "pre-commit>=3.5.0", -] - -test = [ - "pytest>=7.0", - "pytest-timeout>=2.0", - "llama-index-core>=0.12.0", - "python-dotenv>=1.0.0", -] - diskann = [ "leann-backend-diskann", ] @@ -102,6 +84,28 @@ leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = tr leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true } astchunk = { path = "packages/astchunk-leann", editable = true } +[dependency-groups] +# Minimal lint toolchain for CI and local hooks +lint = [ + "pre-commit>=3.5.0", + "ruff==0.12.7", # Fixed version to ensure consistent formatting across all environments +] + +# Test toolchain (no heavy project runtime deps) +test = [ + "pytest>=7.0", + "pytest-cov>=4.0", + "pytest-xdist>=3.0", + "pytest-timeout>=2.0", + "python-dotenv>=1.0.0", +] + +# dependencies by apps/ should list here +dev = [ + "matplotlib", + "huggingface-hub>=0.20.0", +] + [tool.ruff] target-version = "py39" line-length = 100 diff --git a/tests/README.md b/tests/README.md index 4d56d10..d3b3ec9 100644 --- a/tests/README.md +++ b/tests/README.md @@ -40,8 +40,8 @@ Tests DiskANN graph partitioning functionality: ### Install test dependencies: ```bash -# Using extras -uv pip install -e ".[test]" +# Using uv dependency groups (tools only) +uv sync --only-group test ``` ### Run all tests: diff --git a/uv.lock b/uv.lock index 28c5824..d3522c9 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.9" resolution-markers = [ "python_full_version >= '3.12'", @@ -297,45 +297,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/cd/30110dc0ffcf3b131156077b90e9f60ed75711223f306da4db08eff8403b/beautifulsoup4-4.13.4-py3-none-any.whl", hash = "sha256:9bbbb14bfde9d79f38b8cd5f8c7c85f4b8f2523190ebed90e950a8dea4cb1c4b", size = 187285, upload-time = "2025-04-15T17:05:12.221Z" }, ] -[[package]] -name = "black" -version = "25.1.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click", version = "8.1.8", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "click", version = "8.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "mypy-extensions" }, - { name = "packaging" }, - { name = "pathspec" }, - { name = "platformdirs" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449, upload-time = "2025-01-29T04:15:40.373Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/4d/3b/4ba3f93ac8d90410423fdd31d7541ada9bcee1df32fb90d26de41ed40e1d/black-25.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759e7ec1e050a15f89b770cefbf91ebee8917aac5c20483bc2d80a6c3a04df32", size = 1629419, upload-time = "2025-01-29T05:37:06.642Z" }, - { url = "https://files.pythonhosted.org/packages/b4/02/0bde0485146a8a5e694daed47561785e8b77a0466ccc1f3e485d5ef2925e/black-25.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0e519ecf93120f34243e6b0054db49c00a35f84f195d5bce7e9f5cfc578fc2da", size = 1461080, upload-time = "2025-01-29T05:37:09.321Z" }, - { url = "https://files.pythonhosted.org/packages/52/0e/abdf75183c830eaca7589144ff96d49bce73d7ec6ad12ef62185cc0f79a2/black-25.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:055e59b198df7ac0b7efca5ad7ff2516bca343276c466be72eb04a3bcc1f82d7", size = 1766886, upload-time = "2025-01-29T04:18:24.432Z" }, - { url = "https://files.pythonhosted.org/packages/dc/a6/97d8bb65b1d8a41f8a6736222ba0a334db7b7b77b8023ab4568288f23973/black-25.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:db8ea9917d6f8fc62abd90d944920d95e73c83a5ee3383493e35d271aca872e9", size = 1419404, upload-time = "2025-01-29T04:19:04.296Z" }, - { url = "https://files.pythonhosted.org/packages/7e/4f/87f596aca05c3ce5b94b8663dbfe242a12843caaa82dd3f85f1ffdc3f177/black-25.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a39337598244de4bae26475f77dda852ea00a93bd4c728e09eacd827ec929df0", size = 1614372, upload-time = "2025-01-29T05:37:11.71Z" }, - { url = "https://files.pythonhosted.org/packages/e7/d0/2c34c36190b741c59c901e56ab7f6e54dad8df05a6272a9747ecef7c6036/black-25.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96c1c7cd856bba8e20094e36e0f948718dc688dba4a9d78c3adde52b9e6c2299", size = 1442865, upload-time = "2025-01-29T05:37:14.309Z" }, - { url = "https://files.pythonhosted.org/packages/21/d4/7518c72262468430ead45cf22bd86c883a6448b9eb43672765d69a8f1248/black-25.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bce2e264d59c91e52d8000d507eb20a9aca4a778731a08cfff7e5ac4a4bb7096", size = 1749699, upload-time = "2025-01-29T04:18:17.688Z" }, - { url = "https://files.pythonhosted.org/packages/58/db/4f5beb989b547f79096e035c4981ceb36ac2b552d0ac5f2620e941501c99/black-25.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:172b1dbff09f86ce6f4eb8edf9dede08b1fce58ba194c87d7a4f1a5aa2f5b3c2", size = 1428028, upload-time = "2025-01-29T04:18:51.711Z" }, - { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988, upload-time = "2025-01-29T05:37:16.707Z" }, - { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985, upload-time = "2025-01-29T05:37:18.273Z" }, - { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816, upload-time = "2025-01-29T04:18:33.823Z" }, - { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860, upload-time = "2025-01-29T04:19:12.944Z" }, - { url = "https://files.pythonhosted.org/packages/98/87/0edf98916640efa5d0696e1abb0a8357b52e69e82322628f25bf14d263d1/black-25.1.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8f0b18a02996a836cc9c9c78e5babec10930862827b1b724ddfe98ccf2f2fe4f", size = 1650673, upload-time = "2025-01-29T05:37:20.574Z" }, - { url = "https://files.pythonhosted.org/packages/52/e5/f7bf17207cf87fa6e9b676576749c6b6ed0d70f179a3d812c997870291c3/black-25.1.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:afebb7098bfbc70037a053b91ae8437c3857482d3a690fefc03e9ff7aa9a5fd3", size = 1453190, upload-time = "2025-01-29T05:37:22.106Z" }, - { url = "https://files.pythonhosted.org/packages/e3/ee/adda3d46d4a9120772fae6de454c8495603c37c4c3b9c60f25b1ab6401fe/black-25.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:030b9759066a4ee5e5aca28c3c77f9c64789cdd4de8ac1df642c40b708be6171", size = 1782926, upload-time = "2025-01-29T04:18:58.564Z" }, - { url = "https://files.pythonhosted.org/packages/cc/64/94eb5f45dcb997d2082f097a3944cfc7fe87e071907f677e80788a2d7b7a/black-25.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:a22f402b410566e2d1c950708c77ebf5ebd5d0d88a6a2e87c86d9fb48afa0d18", size = 1442613, upload-time = "2025-01-29T04:19:27.63Z" }, - { url = "https://files.pythonhosted.org/packages/d3/b6/ae7507470a4830dbbfe875c701e84a4a5fb9183d1497834871a715716a92/black-25.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1ee0a0c330f7b5130ce0caed9936a904793576ef4d2b98c40835d6a65afa6a0", size = 1628593, upload-time = "2025-01-29T05:37:23.672Z" }, - { url = "https://files.pythonhosted.org/packages/24/c1/ae36fa59a59f9363017ed397750a0cd79a470490860bc7713967d89cdd31/black-25.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:f3df5f1bf91d36002b0a75389ca8663510cf0531cca8aa5c1ef695b46d98655f", size = 1460000, upload-time = "2025-01-29T05:37:25.829Z" }, - { url = "https://files.pythonhosted.org/packages/ac/b6/98f832e7a6c49aa3a464760c67c7856363aa644f2f3c74cf7d624168607e/black-25.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9e6827d563a2c820772b32ce8a42828dc6790f095f441beef18f96aa6f8294e", size = 1765963, upload-time = "2025-01-29T04:18:38.116Z" }, - { url = "https://files.pythonhosted.org/packages/ce/e9/2cb0a017eb7024f70e0d2e9bdb8c5a5b078c5740c7f8816065d06f04c557/black-25.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:bacabb307dca5ebaf9c118d2d2f6903da0d62c9faa82bd21a33eecc319559355", size = 1419419, upload-time = "2025-01-29T04:18:30.191Z" }, - { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646, upload-time = "2025-01-29T04:15:38.082Z" }, -] - [[package]] name = "bleach" version = "6.2.0" @@ -2290,17 +2251,6 @@ dependencies = [ ] [package.optional-dependencies] -dev = [ - { name = "black" }, - { name = "huggingface-hub" }, - { name = "matplotlib", version = "3.9.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, - { name = "matplotlib", version = "3.10.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, - { name = "pre-commit" }, - { name = "pytest" }, - { name = "pytest-cov" }, - { name = "pytest-xdist" }, - { name = "ruff" }, -] diskann = [ { name = "leann-backend-diskann" }, ] @@ -2310,10 +2260,22 @@ documents = [ { name = "pandas" }, { name = "python-docx" }, ] + +[package.dev-dependencies] +dev = [ + { name = "huggingface-hub" }, + { name = "matplotlib", version = "3.9.4", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" }, + { name = "matplotlib", version = "3.10.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" }, +] +lint = [ + { name = "pre-commit" }, + { name = "ruff" }, +] test = [ - { name = "llama-index-core" }, { name = "pytest" }, + { name = "pytest-cov" }, { name = "pytest-timeout" }, + { name = "pytest-xdist" }, { name = "python-dotenv" }, ] @@ -2321,23 +2283,19 @@ test = [ requires-dist = [ { name = "astchunk", editable = "packages/astchunk-leann" }, { name = "beautifulsoup4", marker = "extra == 'documents'", specifier = ">=4.13.0" }, - { name = "black", marker = "extra == 'dev'", specifier = ">=23.0" }, { name = "boto3" }, { name = "colorama" }, { name = "datasets", specifier = ">=2.15.0" }, { name = "evaluate" }, { name = "gitignore-parser", specifier = ">=0.1.12" }, - { name = "huggingface-hub", marker = "extra == 'dev'", specifier = ">=0.20.0" }, { name = "ipykernel", specifier = "==6.29.5" }, { name = "leann-backend-diskann", marker = "extra == 'diskann'", editable = "packages/leann-backend-diskann" }, { name = "leann-backend-hnsw", editable = "packages/leann-backend-hnsw" }, { name = "leann-core", editable = "packages/leann-core" }, { name = "llama-index", specifier = ">=0.12.44" }, - { name = "llama-index-core", marker = "extra == 'test'", specifier = ">=0.12.0" }, { name = "llama-index-embeddings-huggingface", specifier = ">=0.5.5" }, { name = "llama-index-readers-file", specifier = ">=0.4.0" }, { name = "llama-index-vector-stores-faiss", specifier = ">=0.4.0" }, - { name = "matplotlib", marker = "extra == 'dev'" }, { name = "mlx", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'", specifier = ">=0.26.3" }, { name = "mlx-lm", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'", specifier = ">=0.26.0" }, { name = "msgpack", specifier = ">=1.1.1" }, @@ -2349,22 +2307,14 @@ requires-dist = [ { name = "pandas", marker = "extra == 'documents'", specifier = ">=2.2.0" }, { name = "pathspec", specifier = ">=0.12.1" }, { name = "pdfplumber", specifier = ">=0.11.0" }, - { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.5.0" }, { name = "protobuf", specifier = "==4.25.3" }, { name = "psutil", specifier = ">=5.8.0" }, { name = "pybind11", specifier = ">=3.0.0" }, { name = "pymupdf", specifier = ">=1.26.0" }, { name = "pypdf2", specifier = ">=3.0.0" }, { name = "pypdfium2", specifier = ">=4.30.0" }, - { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" }, - { name = "pytest", marker = "extra == 'test'", specifier = ">=7.0" }, - { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0" }, - { name = "pytest-timeout", marker = "extra == 'test'", specifier = ">=2.0" }, - { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.0" }, { name = "python-docx", marker = "extra == 'documents'", specifier = ">=0.8.11" }, - { name = "python-dotenv", marker = "extra == 'test'", specifier = ">=1.0.0" }, { name = "requests", specifier = ">=2.25.0" }, - { name = "ruff", marker = "extra == 'dev'", specifier = "==0.12.7" }, { name = "sentence-transformers", specifier = ">=2.2.0" }, { name = "sglang" }, { name = "torch" }, @@ -2377,7 +2327,24 @@ requires-dist = [ { name = "tree-sitter-typescript", specifier = ">=0.20.0" }, { name = "typer", specifier = ">=0.12.3" }, ] -provides-extras = ["dev", "test", "diskann", "documents"] +provides-extras = ["diskann", "documents"] + +[package.metadata.requires-dev] +dev = [ + { name = "huggingface-hub", specifier = ">=0.20.0" }, + { name = "matplotlib" }, +] +lint = [ + { name = "pre-commit", specifier = ">=3.5.0" }, + { name = "ruff", specifier = "==0.12.7" }, +] +test = [ + { name = "pytest", specifier = ">=7.0" }, + { name = "pytest-cov", specifier = ">=4.0" }, + { name = "pytest-timeout", specifier = ">=2.0" }, + { name = "pytest-xdist", specifier = ">=3.0" }, + { name = "python-dotenv", specifier = ">=1.0.0" }, +] [[package]] name = "llama-cloud"