diff --git a/.github/workflows/build-reusable.yml b/.github/workflows/build-reusable.yml index 22b7b7f..06ac31a 100644 --- a/.github/workflows/build-reusable.yml +++ b/.github/workflows/build-reusable.yml @@ -54,16 +54,26 @@ jobs: python: '3.12' - os: ubuntu-22.04 python: '3.13' - - os: macos-latest + - os: macos-14 python: '3.9' - - os: macos-latest + - os: macos-14 python: '3.10' - - os: macos-latest + - os: macos-14 python: '3.11' - - os: macos-latest + - os: macos-14 python: '3.12' - - os: macos-latest + - os: macos-14 python: '3.13' + - os: macos-13 + python: '3.9' + - os: macos-13 + python: '3.10' + - os: macos-13 + python: '3.11' + - os: macos-13 + python: '3.12' + # Note: macos-13 + Python 3.13 excluded due to PyTorch compatibility + # (PyTorch 2.5+ supports Python 3.13 but not Intel Mac x86_64) runs-on: ${{ matrix.os }} steps: @@ -109,48 +119,59 @@ jobs: uv pip install --system delocate fi + - name: Set macOS environment variables + if: runner.os == 'macOS' + run: | + # Use brew --prefix to automatically detect Homebrew installation path + HOMEBREW_PREFIX=$(brew --prefix) + echo "HOMEBREW_PREFIX=${HOMEBREW_PREFIX}" >> $GITHUB_ENV + echo "OpenMP_ROOT=${HOMEBREW_PREFIX}/opt/libomp" >> $GITHUB_ENV + + # Set CMAKE_PREFIX_PATH to let CMake find all packages automatically + echo "CMAKE_PREFIX_PATH=${HOMEBREW_PREFIX}" >> $GITHUB_ENV + + # Set compiler flags for OpenMP (required for both backends) + echo "LDFLAGS=-L${HOMEBREW_PREFIX}/opt/libomp/lib" >> $GITHUB_ENV + echo "CPPFLAGS=-I${HOMEBREW_PREFIX}/opt/libomp/include" >> $GITHUB_ENV + - name: Build packages run: | # Build core (platform independent) - if [[ "${{ matrix.os }}" == ubuntu-* ]]; then - cd packages/leann-core - uv build - cd ../.. - fi + cd packages/leann-core + uv build + cd ../.. # Build HNSW backend cd packages/leann-backend-hnsw - if [ "${{ matrix.os }}" == "macos-latest" ]; then - # Use system clang instead of homebrew LLVM for better compatibility + if [[ "${{ matrix.os }}" == macos-* ]]; then + # Use system clang for better compatibility export CC=clang export CXX=clang++ export MACOSX_DEPLOYMENT_TARGET=11.0 - uv build --wheel --python python + uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist else - uv build --wheel --python python + uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist fi cd ../.. # Build DiskANN backend cd packages/leann-backend-diskann - if [ "${{ matrix.os }}" == "macos-latest" ]; then - # Use system clang instead of homebrew LLVM for better compatibility + if [[ "${{ matrix.os }}" == macos-* ]]; then + # Use system clang for better compatibility export CC=clang export CXX=clang++ # DiskANN requires macOS 13.3+ for sgesdd_ LAPACK function export MACOSX_DEPLOYMENT_TARGET=13.3 - uv build --wheel --python python + uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist else - uv build --wheel --python python + uv build --wheel --python ${{ matrix.python }} --find-links ${GITHUB_WORKSPACE}/packages/leann-core/dist fi cd ../.. # Build meta package (platform independent) - if [[ "${{ matrix.os }}" == ubuntu-* ]]; then - cd packages/leann - uv build - cd ../.. - fi + cd packages/leann + uv build + cd ../.. - name: Repair wheels (Linux) if: runner.os == 'Linux' @@ -199,20 +220,18 @@ jobs: echo "📦 Built packages:" find packages/*/dist -name "*.whl" -o -name "*.tar.gz" | sort + - name: Install built packages for testing run: | - # Create a virtual environment - uv venv + # Create a virtual environment with the correct Python version + uv venv --python ${{ matrix.python }} source .venv/bin/activate || source .venv/Scripts/activate - # Install the built wheels - # Use --find-links to let uv choose the correct wheel for the platform - if [[ "${{ matrix.os }}" == ubuntu-* ]]; then - uv pip install leann-core --find-links packages/leann-core/dist - uv pip install leann --find-links packages/leann/dist - fi - uv pip install leann-backend-hnsw --find-links packages/leann-backend-hnsw/dist - uv pip install leann-backend-diskann --find-links packages/leann-backend-diskann/dist + # Install packages using --find-links to prioritize local builds + uv pip install --find-links packages/leann-core/dist --find-links packages/leann-backend-hnsw/dist --find-links packages/leann-backend-diskann/dist packages/leann-core/dist/*.whl || uv pip install --find-links packages/leann-core/dist packages/leann-core/dist/*.tar.gz + uv pip install --find-links packages/leann-core/dist packages/leann-backend-hnsw/dist/*.whl + uv pip install --find-links packages/leann-core/dist packages/leann-backend-diskann/dist/*.whl + uv pip install packages/leann/dist/*.whl || uv pip install packages/leann/dist/*.tar.gz # Install test dependencies using extras uv pip install -e ".[test]" diff --git a/README.md b/README.md index d53f818..9b803fc 100755 --- a/README.md +++ b/README.md @@ -3,10 +3,11 @@

- Python 3.9+ + Python Versions + CI Status + Platform MIT License - Platform - MCP Integration + MCP Integration

diff --git a/packages/leann-backend-diskann/CMakeLists.txt b/packages/leann-backend-diskann/CMakeLists.txt deleted file mode 100644 index 2638282..0000000 --- a/packages/leann-backend-diskann/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -# packages/leann-backend-diskann/CMakeLists.txt (simplified version) - -cmake_minimum_required(VERSION 3.20) -project(leann_backend_diskann_wrapper) - -# Tell CMake to directly enter the DiskANN submodule and execute its own CMakeLists.txt -# DiskANN will handle everything itself, including compiling Python bindings -add_subdirectory(src/third_party/DiskANN) diff --git a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py index 926a26e..ba5c0ad 100644 --- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py +++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_backend.py @@ -4,7 +4,7 @@ import os import struct import sys from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Optional import numpy as np import psutil @@ -259,7 +259,7 @@ class DiskannSearcher(BaseSearcher): prune_ratio: float = 0.0, recompute_embeddings: bool = False, pruning_strategy: Literal["global", "local", "proportional"] = "global", - zmq_port: int | None = None, + zmq_port: Optional[int] = None, batch_recompute: bool = False, dedup_node_dis: bool = False, **kwargs, diff --git a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py index 1928dc8..749d607 100644 --- a/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py +++ b/packages/leann-backend-diskann/leann_backend_diskann/diskann_embedding_server.py @@ -10,6 +10,7 @@ import sys import threading import time from pathlib import Path +from typing import Optional import numpy as np import zmq @@ -32,7 +33,7 @@ if not logger.handlers: def create_diskann_embedding_server( - passages_file: str | None = None, + passages_file: Optional[str] = None, zmq_port: int = 5555, model_name: str = "sentence-transformers/all-mpnet-base-v2", embedding_mode: str = "sentence-transformers", diff --git a/packages/leann-backend-diskann/pyproject.toml b/packages/leann-backend-diskann/pyproject.toml index 6b33379..055a1e7 100644 --- a/packages/leann-backend-diskann/pyproject.toml +++ b/packages/leann-backend-diskann/pyproject.toml @@ -17,3 +17,5 @@ editable.mode = "redirect" cmake.build-type = "Release" build.verbose = true build.tool-args = ["-j8"] +# Let CMake find packages via Homebrew prefix +cmake.define = {CMAKE_PREFIX_PATH = {env = "CMAKE_PREFIX_PATH"}, OpenMP_ROOT = {env = "OpenMP_ROOT"}} diff --git a/packages/leann-backend-diskann/third_party/DiskANN b/packages/leann-backend-diskann/third_party/DiskANN index b2dc4ea..04048bb 160000 --- a/packages/leann-backend-diskann/third_party/DiskANN +++ b/packages/leann-backend-diskann/third_party/DiskANN @@ -1 +1 @@ -Subproject commit b2dc4ea2c7e52e8a6481d3ba10003e192192a7b7 +Subproject commit 04048bb302a5d032371326a6526ff5410f8ffdab diff --git a/packages/leann-backend-hnsw/CMakeLists.txt b/packages/leann-backend-hnsw/CMakeLists.txt index 1f41393..651792c 100644 --- a/packages/leann-backend-hnsw/CMakeLists.txt +++ b/packages/leann-backend-hnsw/CMakeLists.txt @@ -5,11 +5,20 @@ set(CMAKE_CXX_COMPILER_WORKS 1) # Set OpenMP path for macOS if(APPLE) - set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I/opt/homebrew/opt/libomp/include") - set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I/opt/homebrew/opt/libomp/include") + # Detect Homebrew installation path (Apple Silicon vs Intel) + if(EXISTS "/opt/homebrew/opt/libomp") + set(HOMEBREW_PREFIX "/opt/homebrew") + elseif(EXISTS "/usr/local/opt/libomp") + set(HOMEBREW_PREFIX "/usr/local") + else() + message(FATAL_ERROR "Could not find libomp installation. Please install with: brew install libomp") + endif() + + set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include") + set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_PREFIX}/opt/libomp/include") set(OpenMP_C_LIB_NAMES "omp") set(OpenMP_CXX_LIB_NAMES "omp") - set(OpenMP_omp_LIBRARY "/opt/homebrew/opt/libomp/lib/libomp.dylib") + set(OpenMP_omp_LIBRARY "${HOMEBREW_PREFIX}/opt/libomp/lib/libomp.dylib") # Force use of system libc++ to avoid version mismatch set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py index d6b87f9..1d5f635 100644 --- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py @@ -2,7 +2,7 @@ import logging import os import shutil from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Optional import numpy as np from leann.interface import ( @@ -152,7 +152,7 @@ class HNSWSearcher(BaseSearcher): self, query: np.ndarray, top_k: int, - zmq_port: int | None = None, + zmq_port: Optional[int] = None, complexity: int = 64, beam_width: int = 1, prune_ratio: float = 0.0, diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py index e9c246c..013ae5a 100644 --- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py @@ -10,6 +10,7 @@ import sys import threading import time from pathlib import Path +from typing import Union import msgpack import numpy as np @@ -33,7 +34,7 @@ if not logger.handlers: def create_hnsw_embedding_server( - passages_file: str | None = None, + passages_file: Union[str, None] = None, zmq_port: int = 5555, model_name: str = "sentence-transformers/all-mpnet-base-v2", distance_metric: str = "mips", diff --git a/packages/leann-backend-hnsw/pyproject.toml b/packages/leann-backend-hnsw/pyproject.toml index d178c11..c3657e6 100644 --- a/packages/leann-backend-hnsw/pyproject.toml +++ b/packages/leann-backend-hnsw/pyproject.toml @@ -22,6 +22,8 @@ cmake.build-type = "Release" build.verbose = true build.tool-args = ["-j8"] -# CMake definitions to optimize compilation +# CMake definitions to optimize compilation and find Homebrew packages [tool.scikit-build.cmake.define] CMAKE_BUILD_PARALLEL_LEVEL = "8" +CMAKE_PREFIX_PATH = {env = "CMAKE_PREFIX_PATH"} +OpenMP_ROOT = {env = "OpenMP_ROOT"} diff --git a/packages/leann-backend-hnsw/third_party/faiss b/packages/leann-backend-hnsw/third_party/faiss index ff22e2c..4a2c0d6 160000 --- a/packages/leann-backend-hnsw/third_party/faiss +++ b/packages/leann-backend-hnsw/third_party/faiss @@ -1 +1 @@ -Subproject commit ff22e2c86be1784c760265abe146b1ab0db90ebe +Subproject commit 4a2c0d67d37a6f27c9a1cd695a3d703dcce73bad diff --git a/packages/leann-core/pyproject.toml b/packages/leann-core/pyproject.toml index db7259b..98e7d12 100644 --- a/packages/leann-core/pyproject.toml +++ b/packages/leann-core/pyproject.toml @@ -33,8 +33,8 @@ dependencies = [ "pdfplumber>=0.10.0", "nbconvert>=7.0.0", # For .ipynb file support "gitignore-parser>=0.1.12", # For proper .gitignore handling - "mlx>=0.26.3; sys_platform == 'darwin'", - "mlx-lm>=0.26.0; sys_platform == 'darwin'", + "mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'", + "mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'", ] [project.optional-dependencies] diff --git a/packages/leann-core/src/leann/api.py b/packages/leann-core/src/leann/api.py index 0ae40af..65d75b9 100644 --- a/packages/leann-core/src/leann/api.py +++ b/packages/leann-core/src/leann/api.py @@ -10,7 +10,7 @@ import time import warnings from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Optional import numpy as np @@ -33,7 +33,7 @@ def compute_embeddings( model_name: str, mode: str = "sentence-transformers", use_server: bool = True, - port: int | None = None, + port: Optional[int] = None, is_build=False, ) -> np.ndarray: """ @@ -157,12 +157,12 @@ class LeannBuilder: self, backend_name: str, embedding_model: str = "facebook/contriever", - dimensions: int | None = None, + dimensions: Optional[int] = None, embedding_mode: str = "sentence-transformers", **backend_kwargs, ): self.backend_name = backend_name - backend_factory: LeannBackendFactoryInterface | None = BACKEND_REGISTRY.get(backend_name) + backend_factory: Optional[LeannBackendFactoryInterface] = BACKEND_REGISTRY.get(backend_name) if backend_factory is None: raise ValueError(f"Backend '{backend_name}' not found or not registered.") self.backend_factory = backend_factory @@ -242,7 +242,7 @@ class LeannBuilder: self.backend_kwargs = backend_kwargs self.chunks: list[dict[str, Any]] = [] - def add_text(self, text: str, metadata: dict[str, Any] | None = None): + def add_text(self, text: str, metadata: Optional[dict[str, Any]] = None): if metadata is None: metadata = {} passage_id = metadata.get("id", str(len(self.chunks))) @@ -554,7 +554,7 @@ class LeannSearcher: if "labels" in results and "distances" in results: logger.info(f" Processing {len(results['labels'][0])} passage IDs:") for i, (string_id, dist) in enumerate( - zip(results["labels"][0], results["distances"][0], strict=False) + zip(results["labels"][0], results["distances"][0]) ): try: passage_data = self.passage_manager.get_passage(string_id) @@ -592,7 +592,7 @@ class LeannChat: def __init__( self, index_path: str, - llm_config: dict[str, Any] | None = None, + llm_config: Optional[dict[str, Any]] = None, enable_warmup: bool = False, **kwargs, ): @@ -608,7 +608,7 @@ class LeannChat: prune_ratio: float = 0.0, recompute_embeddings: bool = True, pruning_strategy: Literal["global", "local", "proportional"] = "global", - llm_kwargs: dict[str, Any] | None = None, + llm_kwargs: Optional[dict[str, Any]] = None, expected_zmq_port: int = 5557, **search_kwargs, ): diff --git a/packages/leann-core/src/leann/chat.py b/packages/leann-core/src/leann/chat.py index 3a5acb1..665e1bd 100644 --- a/packages/leann-core/src/leann/chat.py +++ b/packages/leann-core/src/leann/chat.py @@ -8,7 +8,7 @@ import difflib import logging import os from abc import ABC, abstractmethod -from typing import Any +from typing import Any, Optional import torch @@ -311,7 +311,7 @@ def search_hf_models(query: str, limit: int = 10) -> list[str]: def validate_model_and_suggest( model_name: str, llm_type: str, host: str = "http://localhost:11434" -) -> str | None: +) -> Optional[str]: """Validate model name and provide suggestions if invalid""" if llm_type == "ollama": available_models = check_ollama_models(host) @@ -685,7 +685,7 @@ class HFChat(LLMInterface): class OpenAIChat(LLMInterface): """LLM interface for OpenAI models.""" - def __init__(self, model: str = "gpt-4o", api_key: str | None = None): + def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None): self.model = model self.api_key = api_key or os.getenv("OPENAI_API_KEY") @@ -761,7 +761,7 @@ class SimulatedChat(LLMInterface): return "This is a simulated answer from the LLM based on the retrieved context." -def get_llm(llm_config: dict[str, Any] | None = None) -> LLMInterface: +def get_llm(llm_config: Optional[dict[str, Any]] = None) -> LLMInterface: """ Factory function to get an LLM interface based on configuration. diff --git a/packages/leann-core/src/leann/cli.py b/packages/leann-core/src/leann/cli.py index c8f4c4d..31dca55 100644 --- a/packages/leann-core/src/leann/cli.py +++ b/packages/leann-core/src/leann/cli.py @@ -1,6 +1,7 @@ import argparse import asyncio from pathlib import Path +from typing import Union from llama_index.core import SimpleDirectoryReader from llama_index.core.node_parser import SentenceSplitter @@ -310,7 +311,7 @@ Examples: print(f' leann search {example_name} "your query"') print(f" leann ask {example_name} --interactive") - def load_documents(self, docs_dir: str, custom_file_types: str | None = None): + def load_documents(self, docs_dir: str, custom_file_types: Union[str, None] = None): print(f"Loading documents from {docs_dir}...") if custom_file_types: print(f"Using custom file types: {custom_file_types}") diff --git a/packages/leann-core/src/leann/embedding_server_manager.py b/packages/leann-core/src/leann/embedding_server_manager.py index 2e1c12b..74ecd69 100644 --- a/packages/leann-core/src/leann/embedding_server_manager.py +++ b/packages/leann-core/src/leann/embedding_server_manager.py @@ -6,6 +6,7 @@ import subprocess import sys import time from pathlib import Path +from typing import Optional import psutil @@ -182,8 +183,8 @@ class EmbeddingServerManager: e.g., "leann_backend_diskann.embedding_server" """ self.backend_module_name = backend_module_name - self.server_process: subprocess.Popen | None = None - self.server_port: int | None = None + self.server_process: Optional[subprocess.Popen] = None + self.server_port: Optional[int] = None self._atexit_registered = False def start_server( diff --git a/packages/leann-core/src/leann/interface.py b/packages/leann-core/src/leann/interface.py index d63078b..b98c28d 100644 --- a/packages/leann-core/src/leann/interface.py +++ b/packages/leann-core/src/leann/interface.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Any, Literal +from typing import Any, Literal, Union import numpy as np @@ -34,7 +34,9 @@ class LeannBackendSearcherInterface(ABC): pass @abstractmethod - def _ensure_server_running(self, passages_source_file: str, port: int | None, **kwargs) -> int: + def _ensure_server_running( + self, passages_source_file: str, port: Union[int, None], **kwargs + ) -> int: """Ensure server is running""" pass @@ -48,7 +50,7 @@ class LeannBackendSearcherInterface(ABC): prune_ratio: float = 0.0, recompute_embeddings: bool = False, pruning_strategy: Literal["global", "local", "proportional"] = "global", - zmq_port: int | None = None, + zmq_port: Union[int, None] = None, **kwargs, ) -> dict[str, Any]: """Search for nearest neighbors @@ -74,7 +76,7 @@ class LeannBackendSearcherInterface(ABC): self, query: str, use_server_if_available: bool = True, - zmq_port: int | None = None, + zmq_port: Union[int, None] = None, ) -> np.ndarray: """Compute embedding for a query string diff --git a/packages/leann-core/src/leann/searcher_base.py b/packages/leann-core/src/leann/searcher_base.py index 02ec430..ff368c8 100644 --- a/packages/leann-core/src/leann/searcher_base.py +++ b/packages/leann-core/src/leann/searcher_base.py @@ -1,7 +1,7 @@ import json from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Literal +from typing import Any, Literal, Optional import numpy as np @@ -169,7 +169,7 @@ class BaseSearcher(LeannBackendSearcherInterface, ABC): prune_ratio: float = 0.0, recompute_embeddings: bool = False, pruning_strategy: Literal["global", "local", "proportional"] = "global", - zmq_port: int | None = None, + zmq_port: Optional[int] = None, **kwargs, ) -> dict[str, Any]: """ diff --git a/pyproject.toml b/pyproject.toml index 5d424a2..7992b42 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,8 +40,8 @@ dependencies = [ # Other dependencies "ipykernel==6.29.5", "msgpack>=1.1.1", - "mlx>=0.26.3; sys_platform == 'darwin'", - "mlx-lm>=0.26.0; sys_platform == 'darwin'", + "mlx>=0.26.3; sys_platform == 'darwin' and platform_machine == 'arm64'", + "mlx-lm>=0.26.0; sys_platform == 'darwin' and platform_machine == 'arm64'", "psutil>=5.8.0", "pathspec>=0.12.1", "nbconvert>=7.16.6", @@ -91,7 +91,7 @@ leann-backend-diskann = { path = "packages/leann-backend-diskann", editable = tr leann-backend-hnsw = { path = "packages/leann-backend-hnsw", editable = true } [tool.ruff] -target-version = "py310" +target-version = "py39" line-length = 100 extend-exclude = [ "third_party",