Compare commits
24 Commits
cli_fix
...
fix-arm64-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2159a29073 | ||
|
|
185bd38112 | ||
|
|
936fa525de | ||
|
|
f6c83898b8 | ||
|
|
40cb39ed8a | ||
|
|
b74718332e | ||
|
|
2c0e4ec58d | ||
|
|
9836ce049d | ||
|
|
478b10c7c1 | ||
|
|
1fce9ad445 | ||
|
|
d452b1ffa3 | ||
|
|
e0085da8ba | ||
|
|
377c952134 | ||
|
|
0ff18a7d79 | ||
|
|
08f9757c45 | ||
|
|
c5c8a57441 | ||
|
|
a5ef3e66d0 | ||
|
|
5079a8b799 | ||
|
|
07f8129f65 | ||
|
|
45ef563bda | ||
|
|
e9d447ac2a | ||
|
|
141e498329 | ||
|
|
4ca0489cb1 | ||
|
|
e83a671918 |
58
.github/workflows/build-reusable.yml
vendored
58
.github/workflows/build-reusable.yml
vendored
@@ -54,6 +54,17 @@ jobs:
|
||||
python: '3.12'
|
||||
- os: ubuntu-22.04
|
||||
python: '3.13'
|
||||
# ARM64 Linux builds
|
||||
- os: ubuntu-24.04-arm
|
||||
python: '3.9'
|
||||
- os: ubuntu-24.04-arm
|
||||
python: '3.10'
|
||||
- os: ubuntu-24.04-arm
|
||||
python: '3.11'
|
||||
- os: ubuntu-24.04-arm
|
||||
python: '3.12'
|
||||
- os: ubuntu-24.04-arm
|
||||
python: '3.13'
|
||||
- os: macos-14
|
||||
python: '3.9'
|
||||
- os: macos-14
|
||||
@@ -108,13 +119,46 @@ jobs:
|
||||
pkg-config libabsl-dev libaio-dev libprotobuf-dev \
|
||||
patchelf
|
||||
|
||||
# Install Intel MKL for DiskANN
|
||||
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
|
||||
sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
|
||||
echo "LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/latest/linux/compiler/lib/intel64_lin" >> $GITHUB_ENV
|
||||
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/mkl/latest/lib/intel64" >> $GITHUB_ENV
|
||||
# Debug: Show system information
|
||||
echo "🔍 System Information:"
|
||||
echo "Architecture: $(uname -m)"
|
||||
echo "OS: $(uname -a)"
|
||||
echo "CPU info: $(lscpu | head -5)"
|
||||
|
||||
# Install math library based on architecture
|
||||
ARCH=$(uname -m)
|
||||
echo "🔍 Setting up math library for architecture: $ARCH"
|
||||
|
||||
if [[ "$ARCH" == "x86_64" ]]; then
|
||||
# Install Intel MKL for DiskANN on x86_64
|
||||
echo "📦 Installing Intel MKL for x86_64..."
|
||||
wget -q https://registrationcenter-download.intel.com/akdlm/IRC_NAS/79153e0f-74d7-45af-b8c2-258941adf58a/intel-onemkl-2025.0.0.940.sh
|
||||
sudo sh intel-onemkl-2025.0.0.940.sh -a --components intel.oneapi.lin.mkl.devel --action install --eula accept -s
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
echo "MKLROOT=/opt/intel/oneapi/mkl/latest" >> $GITHUB_ENV
|
||||
echo "LD_LIBRARY_PATH=/opt/intel/oneapi/compiler/latest/linux/compiler/lib/intel64_lin" >> $GITHUB_ENV
|
||||
echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/intel/oneapi/mkl/latest/lib/intel64" >> $GITHUB_ENV
|
||||
echo "✅ Intel MKL installed for x86_64"
|
||||
|
||||
# Debug: Check MKL installation
|
||||
echo "🔍 MKL Installation Check:"
|
||||
ls -la /opt/intel/oneapi/mkl/latest/ || echo "MKL directory not found"
|
||||
ls -la /opt/intel/oneapi/mkl/latest/lib/ || echo "MKL lib directory not found"
|
||||
|
||||
elif [[ "$ARCH" == "aarch64" ]]; then
|
||||
# Use OpenBLAS for ARM64 (MKL installer not compatible with ARM64)
|
||||
echo "📦 Installing OpenBLAS for ARM64..."
|
||||
sudo apt-get install -y libopenblas-dev liblapack-dev liblapacke-dev
|
||||
echo "✅ OpenBLAS installed for ARM64"
|
||||
|
||||
# Debug: Check OpenBLAS installation
|
||||
echo "🔍 OpenBLAS Installation Check:"
|
||||
dpkg -l | grep openblas || echo "OpenBLAS package not found"
|
||||
ls -la /usr/lib/aarch64-linux-gnu/openblas/ || echo "OpenBLAS directory not found"
|
||||
fi
|
||||
|
||||
# Debug: Show final library paths
|
||||
echo "🔍 Final LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
|
||||
|
||||
- name: Install system dependencies (macOS)
|
||||
if: runner.os == 'macOS'
|
||||
|
||||
Submodule packages/leann-backend-diskann/third_party/DiskANN updated: c593831474...19f9603c72
@@ -49,9 +49,28 @@ set(BUILD_TESTING OFF CACHE BOOL "" FORCE)
|
||||
set(FAISS_ENABLE_C_API OFF CACHE BOOL "" FORCE)
|
||||
set(FAISS_OPT_LEVEL "generic" CACHE STRING "" FORCE)
|
||||
|
||||
# Disable additional SIMD versions to speed up compilation
|
||||
# Disable x86-specific SIMD optimizations (important for ARM64 compatibility)
|
||||
set(FAISS_ENABLE_AVX2 OFF CACHE BOOL "" FORCE)
|
||||
set(FAISS_ENABLE_AVX512 OFF CACHE BOOL "" FORCE)
|
||||
set(FAISS_ENABLE_SSE4_1 OFF CACHE BOOL "" FORCE)
|
||||
|
||||
# ARM64-specific configuration
|
||||
if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
|
||||
message(STATUS "Configuring Faiss for ARM64 architecture")
|
||||
|
||||
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||
# Use SVE optimization level for ARM64 Linux (as seen in Faiss conda build)
|
||||
set(FAISS_OPT_LEVEL "sve" CACHE STRING "" FORCE)
|
||||
message(STATUS "Setting FAISS_OPT_LEVEL to 'sve' for ARM64 Linux")
|
||||
else()
|
||||
# Use generic optimization for other ARM64 platforms (like macOS)
|
||||
set(FAISS_OPT_LEVEL "generic" CACHE STRING "" FORCE)
|
||||
message(STATUS "Setting FAISS_OPT_LEVEL to 'generic' for ARM64 ${CMAKE_SYSTEM_NAME}")
|
||||
endif()
|
||||
|
||||
# ARM64 compatibility: Faiss submodule has been modified to fix x86 header inclusion
|
||||
message(STATUS "Using ARM64-compatible Faiss submodule")
|
||||
endif()
|
||||
|
||||
# Additional optimization options from INSTALL.md
|
||||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "" FORCE)
|
||||
|
||||
Submodule packages/leann-backend-hnsw/third_party/faiss updated: a0361858fc...ed96ff7dba
@@ -322,17 +322,9 @@ Examples:
|
||||
|
||||
return basic_matches
|
||||
|
||||
def _should_exclude_file(self, file_path: Path, gitignore_matches) -> bool:
|
||||
"""Check if a file should be excluded using gitignore parser.
|
||||
|
||||
Always match against absolute, posix-style paths for consistency with
|
||||
gitignore_parser expectations.
|
||||
"""
|
||||
try:
|
||||
absolute_path = file_path.resolve()
|
||||
except Exception:
|
||||
absolute_path = Path(str(file_path))
|
||||
return gitignore_matches(absolute_path.as_posix())
|
||||
def _should_exclude_file(self, relative_path: Path, gitignore_matches) -> bool:
|
||||
"""Check if a file should be excluded using gitignore parser."""
|
||||
return gitignore_matches(str(relative_path))
|
||||
|
||||
def _is_git_submodule(self, path: Path) -> bool:
|
||||
"""Check if a path is a git submodule."""
|
||||
@@ -404,9 +396,7 @@ Examples:
|
||||
print(f" {current_path}")
|
||||
print(" " + "─" * 45)
|
||||
|
||||
current_indexes = self._discover_indexes_in_project(
|
||||
current_path, exclude_dirs=other_projects
|
||||
)
|
||||
current_indexes = self._discover_indexes_in_project(current_path)
|
||||
if current_indexes:
|
||||
for idx in current_indexes:
|
||||
total_indexes += 1
|
||||
@@ -445,14 +435,9 @@ Examples:
|
||||
print(" leann build my-docs --docs ./documents")
|
||||
else:
|
||||
# Count only projects that have at least one discoverable index
|
||||
projects_count = 0
|
||||
for p in valid_projects:
|
||||
if p == current_path:
|
||||
discovered = self._discover_indexes_in_project(p, exclude_dirs=other_projects)
|
||||
else:
|
||||
discovered = self._discover_indexes_in_project(p)
|
||||
if len(discovered) > 0:
|
||||
projects_count += 1
|
||||
projects_count = sum(
|
||||
1 for p in valid_projects if len(self._discover_indexes_in_project(p)) > 0
|
||||
)
|
||||
print(f"📊 Total: {total_indexes} indexes across {projects_count} projects")
|
||||
|
||||
if current_indexes_count > 0:
|
||||
@@ -469,22 +454,9 @@ Examples:
|
||||
print("\n💡 Create your first index:")
|
||||
print(" leann build my-docs --docs ./documents")
|
||||
|
||||
def _discover_indexes_in_project(
|
||||
self, project_path: Path, exclude_dirs: Optional[list[Path]] = None
|
||||
):
|
||||
"""Discover all indexes in a project directory (both CLI and apps formats)
|
||||
|
||||
exclude_dirs: when provided, skip any APP-format index files that are
|
||||
located under these directories. This prevents duplicates when the
|
||||
current project is a parent directory of other registered projects.
|
||||
"""
|
||||
def _discover_indexes_in_project(self, project_path: Path):
|
||||
"""Discover all indexes in a project directory (both CLI and apps formats)"""
|
||||
indexes = []
|
||||
exclude_dirs = exclude_dirs or []
|
||||
# normalize to resolved paths once for comparison
|
||||
try:
|
||||
exclude_dirs_resolved = [p.resolve() for p in exclude_dirs]
|
||||
except Exception:
|
||||
exclude_dirs_resolved = exclude_dirs
|
||||
|
||||
# 1. CLI format: .leann/indexes/index_name/
|
||||
cli_indexes_dir = project_path / ".leann" / "indexes"
|
||||
@@ -523,17 +495,6 @@ Examples:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
# Skip meta files that live under excluded directories
|
||||
try:
|
||||
meta_parent_resolved = meta_file.parent.resolve()
|
||||
if any(
|
||||
meta_parent_resolved.is_relative_to(ex_dir)
|
||||
for ex_dir in exclude_dirs_resolved
|
||||
):
|
||||
continue
|
||||
except Exception:
|
||||
# best effort; if resolve or comparison fails, do not exclude
|
||||
pass
|
||||
# Use the parent directory name as the app index display name
|
||||
display_name = meta_file.parent.name
|
||||
# Extract file base used to store files
|
||||
@@ -1061,8 +1022,7 @@ Examples:
|
||||
|
||||
# Try to use better PDF parsers first, but only if PDFs are requested
|
||||
documents = []
|
||||
# Use resolved absolute paths to avoid mismatches (symlinks, relative vs absolute)
|
||||
docs_path = Path(docs_dir).resolve()
|
||||
docs_path = Path(docs_dir)
|
||||
|
||||
# Check if we should process PDFs
|
||||
should_process_pdfs = custom_file_types is None or ".pdf" in custom_file_types
|
||||
@@ -1071,15 +1031,10 @@ Examples:
|
||||
for file_path in docs_path.rglob("*.pdf"):
|
||||
# Check if file matches any exclude pattern
|
||||
try:
|
||||
# Ensure both paths are resolved before computing relativity
|
||||
file_path_resolved = file_path.resolve()
|
||||
# Determine directory scope using the non-resolved path to avoid
|
||||
# misclassifying symlinked entries as outside the docs directory
|
||||
relative_path = file_path.relative_to(docs_path)
|
||||
if not include_hidden and _path_has_hidden_segment(relative_path):
|
||||
continue
|
||||
# Use absolute path for gitignore matching
|
||||
if self._should_exclude_file(file_path_resolved, gitignore_matches):
|
||||
if self._should_exclude_file(relative_path, gitignore_matches):
|
||||
continue
|
||||
except ValueError:
|
||||
# Skip files that can't be made relative to docs_path
|
||||
@@ -1122,11 +1077,10 @@ Examples:
|
||||
) -> bool:
|
||||
"""Return True if file should be included (not excluded)"""
|
||||
try:
|
||||
docs_path_obj = Path(docs_dir).resolve()
|
||||
file_path_obj = Path(file_path).resolve()
|
||||
# Use absolute path for gitignore matching
|
||||
_ = file_path_obj.relative_to(docs_path_obj) # validate scope
|
||||
return not self._should_exclude_file(file_path_obj, gitignore_matches)
|
||||
docs_path_obj = Path(docs_dir)
|
||||
file_path_obj = Path(file_path)
|
||||
relative_path = file_path_obj.relative_to(docs_path_obj)
|
||||
return not self._should_exclude_file(relative_path, gitignore_matches)
|
||||
except (ValueError, OSError):
|
||||
return True # Include files that can't be processed
|
||||
|
||||
|
||||
Reference in New Issue
Block a user