From 7add391b2cc702b7437a7d6578bd3ff58be5573c Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Thu, 24 Jul 2025 00:47:46 -0700 Subject: [PATCH] chore: build and package --- .github/workflows/build-and-publish.yml | 256 ++++++++++++++++++ docs/RELEASE.md | 54 ++++ .../leann-backend-diskann/third_party/DiskANN | 2 +- packages/leann-backend-hnsw/pyproject.toml | 7 +- packages/leann-core/pyproject.toml | 12 +- packages/leann/README.md | 40 +++ packages/leann/__init__.py | 12 + packages/leann/pyproject.toml | 42 +++ 8 files changed, 421 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/build-and-publish.yml create mode 100644 docs/RELEASE.md create mode 100644 packages/leann/README.md create mode 100644 packages/leann/__init__.py create mode 100644 packages/leann/pyproject.toml diff --git a/.github/workflows/build-and-publish.yml b/.github/workflows/build-and-publish.yml new file mode 100644 index 0000000..43a6b15 --- /dev/null +++ b/.github/workflows/build-and-publish.yml @@ -0,0 +1,256 @@ +name: Build and Publish to PyPI + +on: + release: + types: [published] + push: + tags: + - 'v*' + workflow_dispatch: + inputs: + publish: + description: 'Publish to PyPI' + required: true + default: 'false' + type: choice + options: + - 'false' + - 'test' + - 'prod' + +jobs: + # Build pure Python package: leann-core + build-core: + name: Build leann-core + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install build dependencies + run: | + uv pip install --system build twine + + - name: Build package + run: | + cd packages/leann-core + uv build + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: leann-core-dist + path: packages/leann-core/dist/ + + # Build binary package: leann-backend-hnsw (default backend) + build-hnsw: + name: Build leann-backend-hnsw + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + python-version: ['3.9', '3.10', '3.11', '3.12'] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install system dependencies (Ubuntu) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y libomp-dev libboost-all-dev libzmq3-dev \ + pkg-config patchelf + + - name: Install system dependencies (macOS) + if: runner.os == 'macOS' + run: | + brew install libomp boost zeromq + + - name: Install build dependencies + run: | + uv pip install --system scikit-build-core numpy swig + uv pip install --system auditwheel delocate + + - name: Build wheel + run: | + cd packages/leann-backend-hnsw + uv pip wheel . --no-deps -w dist + + - name: Repair wheel (Linux) + if: runner.os == 'Linux' + run: | + cd packages/leann-backend-hnsw + auditwheel repair dist/*.whl -w dist_repaired + rm -rf dist + mv dist_repaired dist + + - name: Repair wheel (macOS) + if: runner.os == 'macOS' + run: | + cd packages/leann-backend-hnsw + delocate-wheel -w dist_repaired -v dist/*.whl + rm -rf dist + mv dist_repaired dist + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: hnsw-${{ matrix.os }}-py${{ matrix.python-version }} + path: packages/leann-backend-hnsw/dist/ + + # Build binary package: leann-backend-diskann (multi-platform) + build-diskann: + name: Build leann-backend-diskann + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + python-version: ['3.9', '3.10', '3.11', '3.12'] + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install system dependencies (Ubuntu) + if: runner.os == 'Linux' + run: | + sudo apt-get update + sudo apt-get install -y libomp-dev libboost-all-dev libaio-dev libzmq3-dev \ + pkg-config protobuf-compiler libprotobuf-dev libabsl-dev patchelf + + # Install Intel MKL using Intel's installer + wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh + sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s + source /opt/intel/oneapi/setvars.sh + echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV + echo "LD_LIBRARY_PATH=/opt/intel/oneapi/mkl/latest/lib/intel64:$LD_LIBRARY_PATH" >> $GITHUB_ENV + + - name: Install system dependencies (macOS) + if: runner.os == 'macOS' + run: | + brew install libomp boost zeromq + # MKL is not available on Homebrew, but DiskANN can work without it + + - name: Install build dependencies + run: | + uv pip install --system scikit-build-core numpy Cython pybind11 + if [[ "$RUNNER_OS" == "Linux" ]]; then + uv pip install --system auditwheel + else + uv pip install --system delocate + fi + + - name: Build wheel + run: | + cd packages/leann-backend-diskann + uv pip wheel . --no-deps -w dist + + - name: Repair wheel (Linux) + if: runner.os == 'Linux' + run: | + cd packages/leann-backend-diskann + auditwheel repair dist/*.whl -w dist_repaired + rm -rf dist + mv dist_repaired dist + + - name: Repair wheel (macOS) + if: runner.os == 'macOS' + run: | + cd packages/leann-backend-diskann + delocate-wheel -w dist_repaired -v dist/*.whl + rm -rf dist + mv dist_repaired dist + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: diskann-${{ matrix.os }}-py${{ matrix.python-version }} + path: packages/leann-backend-diskann/dist/ + + # Build meta-package: leann (build last) + build-meta: + name: Build leann meta-package + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install uv + uses: astral-sh/setup-uv@v4 + + - name: Install build dependencies + run: | + uv pip install --system build + + - name: Build package + run: | + cd packages/leann + uv build + + - name: Upload artifacts + uses: actions/upload-artifact@v4 + with: + name: leann-meta-dist + path: packages/leann/dist/ + + # Publish to PyPI + publish: + name: Publish to PyPI + needs: [build-core, build-hnsw, build-diskann, build-meta] + runs-on: ubuntu-latest + if: github.event_name == 'release' || github.event.inputs.publish != 'false' + + steps: + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: dist + + - name: Flatten directory structure + run: | + mkdir -p all_wheels + find dist -name "*.whl" -exec cp {} all_wheels/ \; + find dist -name "*.tar.gz" -exec cp {} all_wheels/ \; + + - name: Publish to Test PyPI + if: github.event.inputs.publish == 'test' || github.event_name == 'workflow_dispatch' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.TEST_PYPI_API_TOKEN }} + repository-url: https://test.pypi.org/legacy/ + packages-dir: all_wheels/ + + - name: Publish to PyPI + if: github.event_name == 'release' || github.event.inputs.publish == 'prod' + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_API_TOKEN }} + packages-dir: all_wheels/ \ No newline at end of file diff --git a/docs/RELEASE.md b/docs/RELEASE.md new file mode 100644 index 0000000..72bb251 --- /dev/null +++ b/docs/RELEASE.md @@ -0,0 +1,54 @@ +# Release Guide + +## One-line Release 🚀 + +```bash +./scripts/release.sh 0.1.1 +``` + +That's it! This script will: +1. Update all package versions +2. Commit and push changes +3. Create GitHub release +4. CI automatically builds and publishes to PyPI + +## Manual Testing Before Release + +For testing specific packages locally (especially DiskANN on macOS): + +```bash +# Build specific package locally +./scripts/build_and_test.sh diskann # or hnsw, core, meta, all + +# Test installation in a clean environment +python -m venv test_env +source test_env/bin/activate +pip install packages/*/dist/*.whl + +# Upload to Test PyPI (optional) +./scripts/upload_to_pypi.sh test + +# Upload to Production PyPI (use with caution) +./scripts/upload_to_pypi.sh prod +``` + +### Why Manual Build for DiskANN? + +DiskANN's complex dependencies (protobuf, abseil, etc.) sometimes require local testing before release. The build script will: +- Compile the C++ extension +- Use `delocate` (macOS) or `auditwheel` (Linux) to bundle system libraries +- Create a self-contained wheel with no external dependencies + +## First-time setup + +1. Install GitHub CLI: + ```bash + brew install gh + gh auth login + ``` + +2. Set PyPI token in GitHub: + ```bash + gh secret set PYPI_API_TOKEN + # Paste your PyPI token when prompted + ``` \ No newline at end of file diff --git a/packages/leann-backend-diskann/third_party/DiskANN b/packages/leann-backend-diskann/third_party/DiskANN index af2a264..25339b0 160000 --- a/packages/leann-backend-diskann/third_party/DiskANN +++ b/packages/leann-backend-diskann/third_party/DiskANN @@ -1 +1 @@ -Subproject commit af2a26481e65232b57b82d96e68833cdee9f7635 +Subproject commit 25339b03413b5067c25b6092ea3e0f77ef8515c8 diff --git a/packages/leann-backend-hnsw/pyproject.toml b/packages/leann-backend-hnsw/pyproject.toml index 274f2b4..3aabd89 100644 --- a/packages/leann-backend-hnsw/pyproject.toml +++ b/packages/leann-backend-hnsw/pyproject.toml @@ -8,7 +8,12 @@ build-backend = "scikit_build_core.build" name = "leann-backend-hnsw" version = "0.1.0" description = "Custom-built HNSW (Faiss) backend for the Leann toolkit." -dependencies = ["leann-core==0.1.0", "numpy"] +dependencies = [ + "leann-core==0.1.0", + "numpy", + "pyzmq>=23.0.0", + "msgpack>=1.0.0", +] [tool.scikit-build] wheel.packages = ["leann_backend_hnsw"] diff --git a/packages/leann-core/pyproject.toml b/packages/leann-core/pyproject.toml index 08d2b4e..6a3606a 100644 --- a/packages/leann-core/pyproject.toml +++ b/packages/leann-core/pyproject.toml @@ -5,14 +5,22 @@ build-backend = "setuptools.build_meta" [project] name = "leann-core" version = "0.1.0" -description = "Core API and plugin system for Leann." +description = "Core API and plugin system for LEANN" readme = "README.md" requires-python = ">=3.9" license = { text = "MIT" } +# All required dependencies included dependencies = [ "numpy>=1.20.0", - "tqdm>=4.60.0" + "tqdm>=4.60.0", + "psutil>=5.8.0", + "pyzmq>=23.0.0", + "msgpack>=1.0.0", + "torch>=2.0.0", + "sentence-transformers>=2.2.0", + "llama-index-core>=0.12.0", + "python-dotenv>=1.0.0", ] [project.scripts] diff --git a/packages/leann/README.md b/packages/leann/README.md new file mode 100644 index 0000000..a1e831f --- /dev/null +++ b/packages/leann/README.md @@ -0,0 +1,40 @@ +# LEANN - The smallest vector index in the world + +LEANN is a revolutionary vector database that democratizes personal AI. Transform your laptop into a powerful RAG system that can index and search through millions of documents while using **97% less storage** than traditional solutions **without accuracy loss**. + +## Installation + +```bash +# Default installation (HNSW backend, recommended) +uv pip install leann + +# With DiskANN backend (for large-scale deployments) +uv pip install leann[diskann] +``` + +## Quick Start + +```python +from leann import LeannBuilder, LeannSearcher, LeannChat + +# Build an index +builder = LeannBuilder(backend_name="hnsw") +builder.add_text("LEANN saves 97% storage compared to traditional vector databases.") +builder.build_index("my_index.leann") + +# Search +searcher = LeannSearcher("my_index.leann") +results = searcher.search("storage savings", top_k=3) + +# Chat with your data +chat = LeannChat("my_index.leann", llm_config={"type": "ollama", "model": "llama3.2:1b"}) +response = chat.ask("How much storage does LEANN save?") +``` + +## Documentation + +For full documentation, visit [https://leann.readthedocs.io](https://leann.readthedocs.io) + +## License + +MIT License \ No newline at end of file diff --git a/packages/leann/__init__.py b/packages/leann/__init__.py new file mode 100644 index 0000000..a629a58 --- /dev/null +++ b/packages/leann/__init__.py @@ -0,0 +1,12 @@ +""" +LEANN - Low-storage Embedding Approximation for Neural Networks + +A revolutionary vector database that democratizes personal AI. +""" + +__version__ = "0.1.0" + +# Re-export main API from leann-core +from leann_core import LeannBuilder, LeannSearcher, LeannChat + +__all__ = ["LeannBuilder", "LeannSearcher", "LeannChat"] diff --git a/packages/leann/pyproject.toml b/packages/leann/pyproject.toml new file mode 100644 index 0000000..54a470c --- /dev/null +++ b/packages/leann/pyproject.toml @@ -0,0 +1,42 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "leann" +version = "0.1.0" +description = "LEANN - The smallest vector index in the world. RAG Everything with LEANN!" +readme = "README.md" +requires-python = ">=3.9" +license = { text = "MIT" } +authors = [ + { name = "LEANN Team" } +] +keywords = ["vector-database", "rag", "embeddings", "search", "ai"] +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +# Default installation: core + hnsw +dependencies = [ + "leann-core>=0.1.0", + "leann-backend-hnsw>=0.1.0", +] + +[project.optional-dependencies] +diskann = [ + "leann-backend-diskann>=0.1.0", +] + +[project.urls] +Homepage = "https://github.com/yourusername/leann" +Documentation = "https://leann.readthedocs.io" +Repository = "https://github.com/yourusername/leann" +Issues = "https://github.com/yourusername/leann/issues" \ No newline at end of file