From 0aa84e147b4b071a978c4fe1838579a1e2d95ec6 Mon Sep 17 00:00:00 2001 From: Andy Lee Date: Sat, 5 Jul 2025 23:04:41 +0000 Subject: [PATCH] feat: hnsw embedding server and csr format --- examples/main_cli_example.py | 53 +- .../leann_backend_diskann/__init__.py | 7 - .../leann_backend_hnsw/convert_to_csr.py | 543 ++++++++++++++++++ .../leann_backend_hnsw/hnsw_backend.py | 324 +++++++++-- .../hnsw_embedding_server.py | 34 +- packages/leann-core/src/leann/api.py | 58 +- packages/leann-core/src/leann/registry.py | 9 +- pyproject.toml | 3 +- uv.lock | 82 ++- 9 files changed, 959 insertions(+), 154 deletions(-) create mode 100644 packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py diff --git a/examples/main_cli_example.py b/examples/main_cli_example.py index 0ac6ade..1cfb4d2 100644 --- a/examples/main_cli_example.py +++ b/examples/main_cli_example.py @@ -1,3 +1,6 @@ +import faulthandler +faulthandler.enable() + from llama_index.core import SimpleDirectoryReader, Settings from llama_index.core.readers.base import BaseReader from llama_index.node_parser.docling import DoclingNodeParser @@ -7,7 +10,7 @@ import asyncio import os import dotenv from leann.api import LeannBuilder, LeannSearcher, LeannChat -import leann_backend_diskann # Import to ensure backend registration +import leann_backend_hnsw # Import to ensure backend registration import shutil from pathlib import Path @@ -21,7 +24,7 @@ file_extractor: dict[str, BaseReader] = { ".xlsx": reader, } node_parser = DoclingNodeParser( - chunker=HybridChunker(tokenizer="Qwen/Qwen3-Embedding-4B", max_tokens=256) + chunker=HybridChunker(tokenizer="Qwen/Qwen3-Embedding-4B", max_tokens=64) ) print("Loading documents...") documents = SimpleDirectoryReader( @@ -32,10 +35,8 @@ documents = SimpleDirectoryReader( required_exts=[".pdf", ".docx", ".pptx", ".xlsx"] ).load_data(show_progress=True) print("Documents loaded.") -# Extract text from documents and prepare for Leann all_texts = [] for doc in documents: - # DoclingNodeParser returns Node objects, which have a text attribute nodes = node_parser.get_nodes_from_documents([doc]) for node in nodes: all_texts.append(node.text) @@ -43,34 +44,38 @@ for doc in documents: INDEX_DIR = Path("./test_pdf_index") INDEX_PATH = str(INDEX_DIR / "pdf_documents.leann") -if INDEX_DIR.exists(): - print(f"--- Cleaning up old index directory: {INDEX_DIR} ---") - shutil.rmtree(INDEX_DIR) - -print(f"\n[PHASE 1] Building Leann index...") - -builder = LeannBuilder( - backend_name="diskann", - embedding_model="facebook/contriever", # Using a common sentence transformer model - graph_degree=32, - complexity=64 -) - -print(f"Loaded {len(all_texts)} text chunks from documents.") -for chunk_text in all_texts: - builder.add_text(chunk_text) +if not INDEX_DIR.exists(): + print(f"--- Index directory not found, building new index ---") -builder.build_index(INDEX_PATH) -print(f"\nLeann index built at {INDEX_PATH}!") + print(f"\n[PHASE 1] Building Leann index...") + + # CSR compact mode with recompute + builder = LeannBuilder( + backend_name="hnsw", + embedding_model="facebook/contriever", + graph_degree=32, + complexity=64, + is_compact=True, + is_recompute=True + ) + + print(f"Loaded {len(all_texts)} text chunks from documents.") + for chunk_text in all_texts: + builder.add_text(chunk_text) + + builder.build_index(INDEX_PATH) + print(f"\nLeann index built at {INDEX_PATH}!") +else: + print(f"--- Using existing index at {INDEX_DIR} ---") async def main(): print(f"\n[PHASE 2] Starting Leann chat session...") chat = LeannChat(index_path=INDEX_PATH) query = "Based on the paper, what are the main techniques LEANN explores to reduce the storage overhead and DLPM explore to achieve Fairness and Efiiciency trade-off?" - # query = "What is the Off-policy training in RL?" print(f"You: {query}") - chat_response = chat.ask(query, top_k=20, recompute_beighbor_embeddings=True) + + chat_response = chat.ask(query, top_k=20) print(f"Leann: {chat_response}") if __name__ == "__main__": diff --git a/packages/leann-backend-diskann/leann_backend_diskann/__init__.py b/packages/leann-backend-diskann/leann_backend_diskann/__init__.py index 8074aa4..e69de29 100644 --- a/packages/leann-backend-diskann/leann_backend_diskann/__init__.py +++ b/packages/leann-backend-diskann/leann_backend_diskann/__init__.py @@ -1,7 +0,0 @@ -print("Initializing leann-backend-diskann...") - -try: - from .diskann_backend import DiskannBackend - print("INFO: DiskANN backend loaded successfully") -except ImportError as e: - print(f"WARNING: Could not import DiskANN backend: {e}") \ No newline at end of file diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py b/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py new file mode 100644 index 0000000..b2dc53a --- /dev/null +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/convert_to_csr.py @@ -0,0 +1,543 @@ +import struct +import sys +import numpy as np +import os +import argparse +import gc # Import garbage collector interface +import time +# --- FourCCs (add more if needed) --- +INDEX_HNSW_FLAT_FOURCC = int.from_bytes(b'IHNf', 'little') +# Add other HNSW fourccs if you expect different storage types inside HNSW +# INDEX_HNSW_PQ_FOURCC = int.from_bytes(b'IHNp', 'little') +# INDEX_HNSW_SQ_FOURCC = int.from_bytes(b'IHNs', 'little') +# INDEX_HNSW_CAGRA_FOURCC = int.from_bytes(b'IHNc', 'little') # Example + +EXPECTED_HNSW_FOURCCS = {INDEX_HNSW_FLAT_FOURCC} # Modify if needed +NULL_INDEX_FOURCC = int.from_bytes(b'null', 'little') + +# --- Helper functions for reading/writing binary data --- + +def read_struct(f, fmt): + """Reads data according to the struct format.""" + size = struct.calcsize(fmt) + data = f.read(size) + if len(data) != size: + raise EOFError(f"File ended unexpectedly reading struct fmt '{fmt}'. Expected {size} bytes, got {len(data)}.") + return struct.unpack(fmt, data)[0] + +def read_vector_raw(f, element_fmt_char): + """Reads a vector (size followed by data), returns count and raw bytes.""" + count = -1 # Initialize count + total_bytes = -1 # Initialize total_bytes + try: + count = read_struct(f, ' max_reasonable_count or count < 0: + raise MemoryError(f"Vector count {count} seems unreasonably large, possibly due to file corruption or incorrect format read.") + + total_bytes = count * element_size + # --- FIX for MemoryError: Check for huge byte size before allocation --- + max_reasonable_bytes = 50 * (1024**3) # ~50 GB limit + if total_bytes > max_reasonable_bytes or total_bytes < 0: # Check for overflow + raise MemoryError(f"Attempting to read {total_bytes} bytes ({count} elements * {element_size} bytes/element), which exceeds the safety limit. File might be corrupted or format mismatch.") + + data_bytes = f.read(total_bytes) + + if len(data_bytes) != total_bytes: + raise EOFError(f"File ended unexpectedly reading vector data. Expected {total_bytes} bytes, got {len(data_bytes)}.") + return count, data_bytes + except (MemoryError, OverflowError) as e: + # Add context to the error message + print(f"\nError during raw vector read (element_fmt='{element_fmt_char}', count={count}, total_bytes={total_bytes}): {e}", file=sys.stderr) + raise e # Re-raise the original error type + +def read_numpy_vector(f, np_dtype, struct_fmt_char): + """Reads a vector into a NumPy array.""" + count = -1 # Initialize count for robust error handling + print(f" Reading vector (dtype={np_dtype}, fmt='{struct_fmt_char}')... ", end='', flush=True) + try: + count, data_bytes = read_vector_raw(f, struct_fmt_char) + print(f"Count={count}, Bytes={len(data_bytes)}") + if count > 0 and len(data_bytes) > 0: + arr = np.frombuffer(data_bytes, dtype=np_dtype) + if arr.size != count: + raise ValueError(f"Inconsistent array size after reading. Expected {count}, got {arr.size}") + return arr + elif count == 0: + return np.array([], dtype=np_dtype) + else: + raise ValueError("Read zero bytes but count > 0.") + except MemoryError as e: + # Now count should be defined (or -1 if error was in read_struct) + print(f"\nMemoryError creating NumPy array (dtype={np_dtype}, count={count}). {e}", file=sys.stderr) + raise e + except Exception as e: # Catch other potential errors like ValueError + print(f"\nError reading numpy vector (dtype={np_dtype}, fmt='{struct_fmt_char}', count={count}): {e}", file=sys.stderr) + raise e + + +def write_numpy_vector(f, arr, struct_fmt_char): + """Writes a NumPy array as a vector (size followed by data).""" + count = arr.size + f.write(struct.pack(' 0 else 0 + +def write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneighbor_per_level_np, + levels_np, compact_level_ptr, compact_node_offsets_np, + compact_neighbors_data, storage_fourcc, storage_data): + """Write HNSW data in compact format following C++ read order exactly.""" + # Write IndexHNSW Header + f_out.write(struct.pack(' 1: + f_out.write(struct.pack(' {output_filename}") + start_time = time.time() + original_hnsw_data = {} + neighbors_np = None # Initialize to allow check in finally block + try: + with open(input_filename, 'rb') as f_in, open(output_filename, 'wb') as f_out: + + # --- Read IndexHNSW FourCC and Header --- + print(f"[{time.time() - start_time:.2f}s] Reading Index HNSW header...") + # ... (Keep the header reading logic as before) ... + hnsw_index_fourcc = read_struct(f_in, ' 1: + original_hnsw_data['metric_arg'] = read_struct(f_in, ' 0 else 0 + if neighbors_np.size != expected_neighbors_size: + print(f"Warning: neighbors vector size mismatch. Expected {expected_neighbors_size} based on offsets, got {neighbors_np.size}.") + gc.collect() + + original_hnsw_data['entry_point'] = read_struct(f_in, ' 0 and i % (ntotal // 100 or 1) == 0: # Log progress roughly every 1% + progress = (i / ntotal) * 100 + elapsed = time.time() - start_time + print(f"\r[{elapsed:.2f}s] Converting node {i}/{ntotal} ({progress:.1f}%)...", end="") + + node_max_level = levels_np[i] - 1 + if node_max_level < -1: node_max_level = -1 + + node_ptr_start_index = current_level_ptr_idx + compact_node_offsets_np[i] = node_ptr_start_index + + original_offset_start = offsets_np[i] + num_pointers_expected = (node_max_level + 1) + 1 + + for level in range(node_max_level + 1): + compact_level_ptr.append(current_data_idx) + + begin_orig_np = original_offset_start + get_cum_neighbors(cum_nneighbor_per_level_np, level) + end_orig_np = original_offset_start + get_cum_neighbors(cum_nneighbor_per_level_np, level + 1) + + begin_orig = int(begin_orig_np) + end_orig = int(end_orig_np) + + neighbors_len = len(neighbors_np) # Cache length + begin_orig = min(max(0, begin_orig), neighbors_len) + end_orig = min(max(begin_orig, end_orig), neighbors_len) + + if begin_orig < end_orig: + # Slicing creates a copy, could be memory intensive for large M + # Consider iterating if memory becomes an issue here + level_neighbors_slice = neighbors_np[begin_orig:end_orig] + valid_neighbors_mask = level_neighbors_slice >= 0 + num_valid = np.count_nonzero(valid_neighbors_mask) + + if num_valid > 0: + # Append valid neighbors + compact_neighbors_data.extend(level_neighbors_slice[valid_neighbors_mask]) + current_data_idx += num_valid + total_valid_neighbors_counted += num_valid + + + compact_level_ptr.append(current_data_idx) + current_level_ptr_idx += num_pointers_expected + + compact_node_offsets_np[ntotal] = current_level_ptr_idx + print(f"\r[{time.time() - start_time:.2f}s] Conversion loop finished. ") # Clear progress line + + # --- Validation Checks --- + print(f"[{time.time() - start_time:.2f}s] Running validation checks...") + valid_check_passed = True + # Check 1: Total valid neighbors count + print(f" Checking total valid neighbor count...") + expected_valid_count = np.sum(neighbors_np >= 0) + if total_valid_neighbors_counted != len(compact_neighbors_data): + print(f"Error: Mismatch between counted valid neighbors ({total_valid_neighbors_counted}) and final compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr) + valid_check_passed = False + if expected_valid_count != len(compact_neighbors_data): + print(f"Error: Mismatch between NumPy count of valid neighbors ({expected_valid_count}) and final compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr) + valid_check_passed = False + else: + print(f" OK: Total valid neighbors = {len(compact_neighbors_data)}") + + # Check 2: Final pointer indices consistency + print(f" Checking final pointer indices...") + if compact_node_offsets_np[ntotal] != len(compact_level_ptr): + print(f"Error: Final node offset ({compact_node_offsets_np[ntotal]}) doesn't match level_ptr size ({len(compact_level_ptr)})!", file=sys.stderr) + valid_check_passed = False + if (len(compact_level_ptr) > 0 and compact_level_ptr[-1] != len(compact_neighbors_data)) or \ + (len(compact_level_ptr) == 0 and len(compact_neighbors_data) != 0): + last_ptr = compact_level_ptr[-1] if len(compact_level_ptr) > 0 else -1 + print(f"Error: Last level pointer ({last_ptr}) doesn't match compact_data size ({len(compact_neighbors_data)})!", file=sys.stderr) + valid_check_passed = False + else: + print(f" OK: Final pointers match data size.") + + if not valid_check_passed: + print("Error: Validation checks failed. Output file might be incorrect.", file=sys.stderr) + # Optional: Exit here if validation fails + # return False + + # --- Explicitly delete large intermediate arrays --- + print(f"[{time.time() - start_time:.2f}s] Deleting original neighbors and offsets arrays...") + del neighbors_np + del offsets_np + gc.collect() + + print(f" CSR Stats: |data|={len(compact_neighbors_data)}, |level_ptr|={len(compact_level_ptr)}") + + # --- Write CSR HNSW graph data using unified function --- + print(f"[{time.time() - start_time:.2f}s] Writing CSR HNSW graph data in FAISS-compatible order...") + + # Determine storage fourcc based on prune_embeddings + output_storage_fourcc = NULL_INDEX_FOURCC if prune_embeddings else (storage_fourcc if 'storage_fourcc' in locals() else NULL_INDEX_FOURCC) + if prune_embeddings: + print(f" Pruning embeddings: Writing NULL storage marker.") + storage_data = b'' + + # Use the unified write function + write_compact_format(f_out, original_hnsw_data, assign_probas_np, cum_nneighbor_per_level_np, + levels_np, compact_level_ptr, compact_node_offsets_np, + compact_neighbors_data, output_storage_fourcc, storage_data if not prune_embeddings else b'') + + # Clean up memory + del assign_probas_np, cum_nneighbor_per_level_np, levels_np + del compact_neighbors_data, compact_level_ptr, compact_node_offsets_np + gc.collect() + + end_time = time.time() + print(f"[{end_time - start_time:.2f}s] Conversion complete.") + return True + + except FileNotFoundError: + print(f"Error: Input file not found: {input_filename}", file=sys.stderr) + return False + except MemoryError as e: + print(f"\nFatal MemoryError during conversion: {e}. Insufficient RAM.", file=sys.stderr) + # Clean up potentially partially written output file? + try: os.remove(output_filename) + except OSError: pass + return False + except EOFError as e: + print(f"Error: Reached end of file unexpectedly reading {input_filename}. {e}", file=sys.stderr) + try: os.remove(output_filename) + except OSError: pass + return False + except Exception as e: + print(f"An unexpected error occurred during conversion: {e}", file=sys.stderr) + import traceback + traceback.print_exc() + try: + os.remove(output_filename) + except OSError: pass + return False + # Ensure neighbors_np is deleted even if an error occurs after its allocation + finally: + if 'neighbors_np' in locals() and neighbors_np is not None: + del neighbors_np + gc.collect() + + +# --- Script Execution --- +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert a Faiss IndexHNSWFlat file to a CSR-based HNSW graph file.") + parser.add_argument("input_index_file", help="Path to the input IndexHNSWFlat file") + parser.add_argument("output_csr_graph_file", help="Path to write the output CSR HNSW graph file") + parser.add_argument("--prune-embeddings", action="store_true", default=True, + help="Prune embedding storage (write NULL storage marker)") + parser.add_argument("--keep-embeddings", action="store_true", + help="Keep embedding storage (overrides --prune-embeddings)") + + args = parser.parse_args() + + if not os.path.exists(args.input_index_file): + print(f"Error: Input file not found: {args.input_index_file}", file=sys.stderr) + sys.exit(1) + + if os.path.abspath(args.input_index_file) == os.path.abspath(args.output_csr_graph_file): + print(f"Error: Input and output filenames cannot be the same.", file=sys.stderr) + sys.exit(1) + + prune_embeddings = args.prune_embeddings and not args.keep_embeddings + success = convert_hnsw_graph_to_csr(args.input_index_file, args.output_csr_graph_file, prune_embeddings) + if not success: + sys.exit(1) \ No newline at end of file diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py index 7e74de1..10add4d 100644 --- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py @@ -3,7 +3,7 @@ import os import json import struct from pathlib import Path -from typing import Dict +from typing import Dict, Any import contextlib import threading import time @@ -12,9 +12,7 @@ import socket import subprocess import sys -# 文件: packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py - -# ... (其他 import 保持不变) ... +from .convert_to_csr import convert_hnsw_graph_to_csr from leann.registry import register_backend from leann.interface import ( @@ -28,7 +26,7 @@ def get_metric_map(): return { "mips": faiss.METRIC_INNER_PRODUCT, "l2": faiss.METRIC_L2, - "cosine": faiss.METRIC_INNER_PRODUCT, # Will need normalization + "cosine": faiss.METRIC_INNER_PRODUCT, } def _check_port(port: int) -> bool: @@ -69,12 +67,11 @@ class HNSWEmbeddingServerManager: try: command = [ sys.executable, - "-m", "packages.leann-backend-hnsw.src.leann_backend_hnsw.hnsw_embedding_server", + "-m", "leann_backend_hnsw.hnsw_embedding_server", "--zmq-port", str(port), "--model-name", model_name ] - # Add passages file if provided if passages_file: command.extend(["--passages-file", str(passages_file)]) @@ -172,7 +169,29 @@ class HNSWBackend(LeannBackendFactoryInterface): class HNSWBuilder(LeannBackendBuilderInterface): def __init__(self, **kwargs): - self.build_params = kwargs + self.build_params = kwargs.copy() + + # --- Configuration defaults with standardized names --- + # Apply defaults and write them back to the build_params dict + # so they can be saved in the metadata file by LeannBuilder. + self.is_compact = self.build_params.setdefault("is_compact", True) + self.is_recompute = self.build_params.setdefault("is_recompute", True) # Default: prune embeddings + + # --- Additional Options --- + self.is_skip_neighbors = self.build_params.setdefault("is_skip_neighbors", False) + self.disk_cache_ratio = self.build_params.setdefault("disk_cache_ratio", 0.0) + self.external_storage_path = self.build_params.get("external_storage_path", None) + + # --- Standard HNSW parameters --- + self.M = self.build_params.setdefault("M", 32) + self.efConstruction = self.build_params.setdefault("efConstruction", 200) + self.distance_metric = self.build_params.setdefault("distance_metric", "mips") + + if self.is_skip_neighbors and not self.is_compact: + raise ValueError("is_skip_neighbors can only be used with is_compact=True") + + if self.is_recompute and not self.is_compact: + raise ValueError("is_recompute requires is_compact=True for efficiency") def build(self, data: np.ndarray, index_path: str, **kwargs): """Build HNSW index using FAISS""" @@ -189,97 +208,297 @@ class HNSWBuilder(LeannBackendBuilderInterface): if not data.flags['C_CONTIGUOUS']: data = np.ascontiguousarray(data) - build_kwargs = {**self.build_params, **kwargs} - metric_str = build_kwargs.get("distance_metric", "mips").lower() + metric_str = self.distance_metric.lower() metric_enum = get_metric_map().get(metric_str) print('metric_enum', metric_enum,' metric_str', metric_str) if metric_enum is None: raise ValueError(f"Unsupported distance_metric '{metric_str}'.") - # HNSW parameters - M = build_kwargs.get("M", 32) # Max connections per layer - efConstruction = build_kwargs.get("efConstruction", 200) # Size of the dynamic candidate list for construction + M = self.M + efConstruction = self.efConstruction dim = data.shape[1] print(f"INFO: Building HNSW index for {data.shape[0]} vectors with metric {metric_enum}...") try: - # Create HNSW index if metric_enum == faiss.METRIC_INNER_PRODUCT: index = faiss.IndexHNSWFlat(dim, M, metric_enum) else: # L2 index = faiss.IndexHNSWFlat(dim, M, metric_enum) - # Set construction parameters index.hnsw.efConstruction = efConstruction - # Normalize vectors if using cosine similarity if metric_str == "cosine": faiss.normalize_L2(data) - # Add vectors to index print('starting to add vectors to index') index.add(data.shape[0], faiss.swig_ptr(data)) print('vectors added to index') - # Save index index_file = index_dir / f"{index_prefix}.index" faiss.write_index(index, str(index_file)) print(f"✅ HNSW index built successfully at '{index_file}'") + + if self.is_compact: + self._convert_to_csr(index_file) + + # Generate passages file for recompute mode + if self.is_recompute: + self._generate_passages_file(index_dir, index_prefix, **kwargs) except Exception as e: print(f"💥 ERROR: HNSW index build failed. Exception: {e}") raise + def _convert_to_csr(self, index_file: Path): + """Convert built index to CSR format""" + try: + mode_str = "CSR-pruned" if self.is_recompute else "CSR-standard" + print(f"INFO: Converting HNSW index to {mode_str} format...") + + csr_temp_file = index_file.with_suffix(".csr.tmp") + + success = convert_hnsw_graph_to_csr( + str(index_file), + str(csr_temp_file), + prune_embeddings=self.is_recompute + ) + + if success: + print("✅ CSR conversion successful.") + import shutil + shutil.move(str(csr_temp_file), str(index_file)) + print(f"INFO: Replaced original index with {mode_str} version at '{index_file}'") + else: + # Clean up and fail fast + if csr_temp_file.exists(): + os.remove(csr_temp_file) + raise RuntimeError("CSR conversion failed - cannot proceed with compact format") + + except Exception as e: + print(f"💥 ERROR: CSR conversion failed. Exception: {e}") + raise + + def _generate_passages_file(self, index_dir: Path, index_prefix: str, **kwargs): + """Generate passages file for recompute mode""" + try: + chunks = kwargs.get('chunks', []) + if not chunks: + print("INFO: No chunks data provided, skipping passages file generation") + return + + # Generate node_id to text mapping + passages_data = {} + for node_id, chunk in enumerate(chunks): + passages_data[str(node_id)] = chunk["text"] + + # Save passages file + passages_file = index_dir / f"{index_prefix}.passages.json" + with open(passages_file, 'w', encoding='utf-8') as f: + json.dump(passages_data, f, ensure_ascii=False, indent=2) + + print(f"✅ Generated passages file for recompute mode at '{passages_file}' ({len(passages_data)} passages)") + + except Exception as e: + print(f"💥 ERROR: Failed to generate passages file. Exception: {e}") + # Don't raise - this is not critical for index building + pass + class HNSWSearcher(LeannBackendSearcherInterface): + def _get_index_storage_status(self, index_file: Path) -> tuple[bool, bool]: + """ + Robustly determines the index's storage status by parsing the file. + + Returns: + A tuple (is_compact, is_pruned). + """ + if not index_file.exists(): + return False, False + + with open(index_file, 'rb') as f: + try: + def read_struct(fmt): + size = struct.calcsize(fmt) + data = f.read(size) + if len(data) != size: + raise EOFError(f"File ended unexpectedly reading struct fmt '{fmt}'.") + return struct.unpack(fmt, data)[0] + + def skip_vector(element_size): + count = read_struct(' 1: read_struct(' Dict[str, any]: + self.is_compact, self.is_pruned = self._get_index_storage_status(index_file) + + # Validate configuration constraints + if not self.is_compact and self.config.get("is_skip_neighbors", False): + raise ValueError("is_skip_neighbors can only be used with is_compact=True") + + if self.config.get("is_recompute", False) and self.config.get("external_storage_path"): + raise ValueError("Cannot use both is_recompute and external_storage_path simultaneously") + + hnsw_config = faiss.HNSWIndexConfig() + hnsw_config.is_compact = self.is_compact + + # Apply additional configuration options with strict validation + hnsw_config.is_skip_neighbors = self.config.get("is_skip_neighbors", False) + # If index is pruned, force recompute mode regardless of user setting + hnsw_config.is_recompute = self.is_pruned or self.config.get("is_recompute", False) + hnsw_config.disk_cache_ratio = self.config.get("disk_cache_ratio", 0.0) + hnsw_config.external_storage_path = self.config.get("external_storage_path") + hnsw_config.zmq_port = self.config.get("zmq_port", 5557) + + # CRITICAL ASSERTION: If index is pruned, recompute MUST be enabled + if self.is_pruned and not hnsw_config.is_recompute: + raise RuntimeError("Index is pruned (embeddings removed) but recompute is disabled. This is impossible - recompute must be enabled for pruned indices.") + + print(f"INFO: Loading index with is_compact={self.is_compact}, is_pruned={self.is_pruned}") + print(f"INFO: Config - skip_neighbors={hnsw_config.is_skip_neighbors}, recompute={hnsw_config.is_recompute}") + + self._index = faiss.read_index(str(index_file), faiss.IO_FLAG_MMAP, hnsw_config) + + if self.is_compact: + print("✅ Compact CSR format HNSW index loaded successfully.") + else: + print("✅ Standard HNSW index loaded successfully.") + + self.metric_str = metric_str + self.embedding_server_manager = HNSWEmbeddingServerManager() + + def _get_index_file(self, index_dir: Path, index_prefix: str) -> Path: + """Get the appropriate index file path based on format""" + # We always use the same filename now, format is detected internally + return index_dir / f"{index_prefix}.index" + + def search(self, query: np.ndarray, top_k: int, **kwargs) -> Dict[str, Any]: """Search using HNSW index with optional recompute functionality""" from . import faiss - ef = kwargs.get("ef", 200) # Size of the dynamic candidate list for search + # Merge config with search-time kwargs + search_config = self.config.copy() + search_config.update(kwargs) + + ef = search_config.get("ef", 200) # Size of the dynamic candidate list for search # Recompute parameters - recompute_neighbor_embeddings = kwargs.get("recompute_neighbor_embeddings", False) - zmq_port = kwargs.get("zmq_port", 5556) - embedding_model = kwargs.get("embedding_model", "sentence-transformers/all-mpnet-base-v2") - passages_file = kwargs.get("passages_file", None) + zmq_port = search_config.get("zmq_port", 5557) + embedding_model = search_config.get("embedding_model", "sentence-transformers/all-mpnet-base-v2") + passages_file = search_config.get("passages_file", None) - if recompute_neighbor_embeddings: - print(f"INFO: HNSW ZMQ mode enabled - ensuring embedding server is running") + # For recompute mode, try to find the passages file automatically + if self.is_pruned and not passages_file: + potential_passages_file = self.index_dir / f"{self.index_prefix}.passages.json" + print(f"DEBUG: Checking for passages file at: {potential_passages_file}") + if potential_passages_file.exists(): + passages_file = str(potential_passages_file) + print(f"INFO: Found passages file for recompute mode: {passages_file}") + else: + print(f"WARNING: No passages file found for recompute mode at {potential_passages_file}") + + # If index is pruned (embeddings removed), we MUST start embedding server for recompute + if self.is_pruned: + print(f"INFO: Index is pruned - starting embedding server for recompute") - if not self.embedding_server_manager.start_server(zmq_port, embedding_model, passages_file): - print(f"WARNING: Failed to start HNSW embedding server, falling back to standard search") - kwargs['recompute_neighbor_embeddings'] = False + # CRITICAL: Check passages file exists - fail fast if not + if not passages_file: + raise RuntimeError(f"FATAL: Index is pruned but no passages file found. Cannot proceed with recompute mode.") + + # Check if server is already running first + if _check_port(zmq_port): + print(f"INFO: Embedding server already running on port {zmq_port}") + else: + if not self.embedding_server_manager.start_server(zmq_port, embedding_model, passages_file): + raise RuntimeError(f"Failed to start HNSW embedding server on port {zmq_port}") + + # Give server extra time to fully initialize + print(f"INFO: Waiting for embedding server to fully initialize...") + time.sleep(3) + + # Final verification + if not _check_port(zmq_port): + raise RuntimeError(f"Embedding server failed to start listening on port {zmq_port}") + else: + print(f"INFO: Index has embeddings stored - no recompute needed") if query.dtype != np.float32: query = query.astype(np.float32) @@ -299,23 +518,14 @@ class HNSWSearcher(LeannBackendSearcherInterface): distances = np.empty((batch_size, top_k), dtype=np.float32) labels = np.empty((batch_size, top_k), dtype=np.int64) - if recompute_neighbor_embeddings: - # Use custom search with recompute - # This would require implementing custom HNSW search logic - # For now, we'll fall back to standard search - print("WARNING: Recompute functionality for HNSW not yet implemented, using standard search") - self._index.search(query.shape[0], faiss.swig_ptr(query), top_k, faiss.swig_ptr(distances), faiss.swig_ptr(labels)) - else: - # Standard FAISS search using SWIG API - self._index.search(query.shape[0], faiss.swig_ptr(query), top_k, faiss.swig_ptr(distances), faiss.swig_ptr(labels)) + # Use standard FAISS search - recompute is handled internally by FAISS + self._index.search(query.shape[0], faiss.swig_ptr(query), top_k, faiss.swig_ptr(distances), faiss.swig_ptr(labels)) return {"labels": labels, "distances": distances} except Exception as e: print(f"💥 ERROR: HNSW search failed. Exception: {e}") - batch_size = query.shape[0] - return {"labels": np.full((batch_size, top_k), -1, dtype=np.int64), - "distances": np.full((batch_size, top_k), float('inf'), dtype=np.float32)} + raise def __del__(self): if hasattr(self, 'embedding_server_manager'): diff --git a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py index 4c1aee0..c7d3005 100644 --- a/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py +++ b/packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_embedding_server.py @@ -101,7 +101,9 @@ def create_hnsw_embedding_server( model_name: Transformer model name custom_max_length_param: Custom max sequence length """ + print(f"Loading tokenizer for {model_name}...") tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) + print(f"Tokenizer loaded successfully!") # Device setup mps_available = hasattr(torch.backends, 'mps') and torch.backends.mps.is_available() @@ -122,7 +124,9 @@ def create_hnsw_embedding_server( # Load model to the appropriate device print(f"Starting HNSW server on port {zmq_port} with model {model_name}") + print(f"Loading model {model_name}... (this may take a while if downloading)") model = AutoModel.from_pretrained(model_name).to(device).eval() + print(f"Model {model_name} loaded successfully!") # Check port availability import socket @@ -364,13 +368,14 @@ def create_hnsw_embedding_server( missing_ids = [] with lookup_timer.timing(): for nid in node_ids: - txtinfo = passages[nid] - if txtinfo is None or txtinfo["text"] == "": - print(f"Warning: Passage with ID {nid} not found") - missing_ids.append(nid) - txt = "" - else: - txt = txtinfo["text"] + try: + txtinfo = passages[nid] + if txtinfo is None or txtinfo["text"] == "": + raise RuntimeError(f"FATAL: Passage with ID {nid} not found - failing fast") + else: + txt = txtinfo["text"] + except (KeyError, IndexError): + raise RuntimeError(f"FATAL: Passage with ID {nid} not found - failing fast") texts.append(txt) lookup_timer.print_elapsed() @@ -450,13 +455,14 @@ def create_hnsw_embedding_server( missing_ids = [] with lookup_timer.timing(): for nid in node_ids: - txtinfo = passages[nid] - if txtinfo is None or txtinfo["text"] == "": - print(f"Warning: Passage with ID {nid} not found") - missing_ids.append(nid) - txt = "" - else: - txt = txtinfo["text"] + try: + txtinfo = passages[nid] + if txtinfo is None or txtinfo["text"] == "": + raise RuntimeError(f"FATAL: Passage with ID {nid} not found - failing fast") + else: + txt = txtinfo["text"] + except (KeyError, IndexError): + raise RuntimeError(f"FATAL: Passage with ID {nid} not found - failing fast") texts.append(txt) lookup_timer.print_elapsed() diff --git a/packages/leann-core/src/leann/api.py b/packages/leann-core/src/leann/api.py index ce6b50e..9eb6a01 100644 --- a/packages/leann-core/src/leann/api.py +++ b/packages/leann-core/src/leann/api.py @@ -5,56 +5,50 @@ import numpy as np import os import json from pathlib import Path -import openai # Import openai library +import openai -# 一个辅助函数,用于临时计算 embedding def _compute_embeddings(chunks: List[str], model_name: str) -> np.ndarray: - try: - from sentence_transformers import SentenceTransformer - model = SentenceTransformer(model_name) - print(f"INFO: Computing embeddings for {len(chunks)} chunks using '{model_name}'...") - embeddings = model.encode(chunks, show_progress_bar=True) - return np.asarray(embeddings, dtype=np.float32) - except ImportError: - print("WARNING: sentence-transformers not installed. Falling back to random embeddings.") - # 如果没有安装,则生成随机向量用于测试 - # TODO: 应该从一个固定的地方获取维度信息 - return np.random.rand(len(chunks), 768).astype(np.float32) + from sentence_transformers import SentenceTransformer + # TODO: use a better embedding model + model = SentenceTransformer(model_name) + print(f"INFO: Computing embeddings for {len(chunks)} chunks using '{model_name}'...") + embeddings = model.encode(chunks, show_progress_bar=True) + return np.asarray(embeddings, dtype=np.float32) class LeannBuilder: """ - 负责构建 Leann 索引的上层 API。 - 它协调 embedding 计算和后端索引构建。 + The builder is responsible for building the index, it will compute the embeddings and then build the index. + It will also save the metadata of the index. """ def __init__(self, backend_name: str, embedding_model: str = "sentence-transformers/all-mpnet-base-v2", **backend_kwargs): self.backend_name = backend_name - self.backend_factory = BACKEND_REGISTRY.get(backend_name) - if self.backend_factory is None: + backend_factory: LeannBackendFactoryInterface | None = BACKEND_REGISTRY.get(backend_name) + if backend_factory is None: raise ValueError(f"Backend '{backend_name}' not found or not registered.") - + self.backend_factory = backend_factory + self.embedding_model = embedding_model self.backend_kwargs = backend_kwargs self.chunks: List[Dict[str, Any]] = [] print(f"INFO: LeannBuilder initialized with '{backend_name}' backend.") def add_text(self, text: str, metadata: Optional[Dict[str, Any]] = None): - # 简单的分块逻辑 self.chunks.append({"text": text, "metadata": metadata or {}}) def build_index(self, index_path: str): if not self.chunks: raise ValueError("No chunks added. Use add_text() first.") - # 1. 计算 embedding (这是 leann-core 的职责) texts_to_embed = [c["text"] for c in self.chunks] embeddings = _compute_embeddings(texts_to_embed, self.embedding_model) - # 2. 创建 builder 实例并构建索引 builder_instance = self.backend_factory.builder(**self.backend_kwargs) - builder_instance.build(embeddings, index_path, **self.backend_kwargs) + # Pass chunks data for passages file generation + build_kwargs = self.backend_kwargs.copy() + build_kwargs['chunks'] = self.chunks + builder_instance.build(embeddings, index_path, **build_kwargs) - # 3. 保存 leann 特有的元数据(不包含向量) index_dir = Path(index_path).parent leann_meta_path = index_dir / f"{Path(index_path).name}.meta.json" @@ -62,6 +56,7 @@ class LeannBuilder: "version": "0.1.0", "backend_name": self.backend_name, "embedding_model": self.embedding_model, + "backend_kwargs": self.backend_kwargs, "num_chunks": len(self.chunks), "chunks": self.chunks, } @@ -72,7 +67,8 @@ class LeannBuilder: class LeannSearcher: """ - 负责加载索引并执行检索的上层 API。 + The searcher is responsible for loading the index and performing the search. + It will also load the metadata of the index. """ def __init__(self, index_path: str, **backend_kwargs): leann_meta_path = Path(index_path).parent / f"{Path(index_path).name}.meta.json" @@ -89,17 +85,17 @@ class LeannSearcher: if backend_factory is None: raise ValueError(f"Backend '{backend_name}' (from index file) not found or not registered.") - # 创建 searcher 实例 - self.backend_impl = backend_factory.searcher(index_path, **backend_kwargs) + final_kwargs = self.meta_data.get("backend_kwargs", {}) + final_kwargs.update(backend_kwargs) + + self.backend_impl = backend_factory.searcher(index_path, **final_kwargs) print(f"INFO: LeannSearcher initialized with '{backend_name}' backend using index '{index_path}'.") def search(self, query: str, top_k: int = 5, **search_kwargs): query_embedding = _compute_embeddings([query], self.embedding_model) - # 委托给后端的 search 方法 results = self.backend_impl.search(query_embedding, top_k, **search_kwargs) - # 丰富返回结果,加入原始文本和元数据 enriched_results = [] for label, dist in zip(results['labels'][0], results['distances'][0]): if label < len(self.meta_data['chunks']): @@ -115,10 +111,10 @@ class LeannSearcher: class LeannChat: """ - 封装了 Searcher 和 LLM 的对话式 RAG 接口。 + The chat is responsible for the conversation with the LLM. + It will use the searcher to get the results and then use the LLM to generate the response. """ def __init__(self, index_path: str, backend_name: Optional[str] = None, llm_model: str = "gpt-4o", **kwargs): - # 如果用户没有指定后端,尝试从索引元数据中读取 if backend_name is None: leann_meta_path = Path(index_path).parent / f"{Path(index_path).name}.meta.json" if not leann_meta_path.exists(): @@ -171,10 +167,8 @@ class LeannChat: results = self.searcher.search(question, top_k=top_k, **kwargs) context = "\n\n".join([r['text'] for r in results]) - # 2. 构建 Prompt prompt = f"Context:\n{context}\n\nQuestion: {question}\n\nAnswer:" - # 3. 调用 LLM print(f"DEBUG: Calling LLM with prompt: {prompt}...") try: client = self._get_openai_client() diff --git a/packages/leann-core/src/leann/registry.py b/packages/leann-core/src/leann/registry.py index e9f54e0..1e6bc72 100644 --- a/packages/leann-core/src/leann/registry.py +++ b/packages/leann-core/src/leann/registry.py @@ -1,10 +1,13 @@ # packages/leann-core/src/leann/registry.py -# 全局的后端注册表字典 -BACKEND_REGISTRY = {} +from typing import Dict, TYPE_CHECKING +if TYPE_CHECKING: + from leann.interface import LeannBackendFactoryInterface + +BACKEND_REGISTRY: Dict[str, 'LeannBackendFactoryInterface'] = {} def register_backend(name: str): - """一个用于注册新后端类的装饰器。""" + """A decorator to register a new backend class.""" def decorator(cls): print(f"INFO: Registering backend '{name}'") BACKEND_REGISTRY[name] = cls diff --git a/pyproject.toml b/pyproject.toml index f6b9b49..2ce90a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,8 @@ dependencies = [ "llama-index>=0.12.44", "llama-index-readers-docling", "llama-index-node-parser-docling", - "ipykernel==6.29.5" + "ipykernel==6.29.5", + "msgpack>=1.1.1", ] [project.optional-dependencies] diff --git a/uv.lock b/uv.lock index 0f93006..a0aad1b 100644 --- a/uv.lock +++ b/uv.lock @@ -2070,6 +2070,7 @@ dependencies = [ { name = "llama-index" }, { name = "llama-index-node-parser-docling" }, { name = "llama-index-readers-docling" }, + { name = "msgpack" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, { name = "ollama" }, @@ -2109,6 +2110,7 @@ requires-dist = [ { name = "llama-index-node-parser-docling" }, { name = "llama-index-readers-docling" }, { name = "matplotlib", marker = "extra == 'dev'" }, + { name = "msgpack", specifier = ">=1.1.1" }, { name = "numpy", specifier = ">=1.26.0" }, { name = "ollama" }, { name = "openai", specifier = ">=1.0.0" }, @@ -2730,13 +2732,13 @@ name = "mlx-lm" version = "0.25.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "jinja2" }, - { name = "mlx" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "protobuf" }, - { name = "pyyaml" }, - { name = "transformers", extra = ["sentencepiece"] }, + { name = "jinja2", marker = "sys_platform == 'darwin'" }, + { name = "mlx", marker = "sys_platform == 'darwin'" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' and sys_platform == 'darwin'" }, + { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' and sys_platform == 'darwin'" }, + { name = "protobuf", marker = "sys_platform == 'darwin'" }, + { name = "pyyaml", marker = "sys_platform == 'darwin'" }, + { name = "transformers", extra = ["sentencepiece"], marker = "sys_platform == 'darwin'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/52/34/d2b551d4519a9bafdfd29f76987dbcaaee370b974cfa81acfba782d6063f/mlx_lm-0.25.2.tar.gz", hash = "sha256:7d01baa66916aabd5be7345786acbfaf01d4e3f646759d7232e14aebfd4420a8", size = 146815 } wheels = [ @@ -2786,6 +2788,54 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 }, ] +[[package]] +name = "msgpack" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/45/b1/ea4f68038a18c77c9467400d166d74c4ffa536f34761f7983a104357e614/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd", size = 173555 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/52/f30da112c1dc92cf64f57d08a273ac771e7b29dea10b4b30369b2d7e8546/msgpack-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:353b6fc0c36fde68b661a12949d7d49f8f51ff5fa019c1e47c87c4ff34b080ed", size = 81799 }, + { url = "https://files.pythonhosted.org/packages/e4/35/7bfc0def2f04ab4145f7f108e3563f9b4abae4ab0ed78a61f350518cc4d2/msgpack-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:79c408fcf76a958491b4e3b103d1c417044544b68e96d06432a189b43d1215c8", size = 78278 }, + { url = "https://files.pythonhosted.org/packages/e8/c5/df5d6c1c39856bc55f800bf82778fd4c11370667f9b9e9d51b2f5da88f20/msgpack-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78426096939c2c7482bf31ef15ca219a9e24460289c00dd0b94411040bb73ad2", size = 402805 }, + { url = "https://files.pythonhosted.org/packages/20/8e/0bb8c977efecfe6ea7116e2ed73a78a8d32a947f94d272586cf02a9757db/msgpack-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b17ba27727a36cb73aabacaa44b13090feb88a01d012c0f4be70c00f75048b4", size = 408642 }, + { url = "https://files.pythonhosted.org/packages/59/a1/731d52c1aeec52006be6d1f8027c49fdc2cfc3ab7cbe7c28335b2910d7b6/msgpack-1.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a17ac1ea6ec3c7687d70201cfda3b1e8061466f28f686c24f627cae4ea8efd0", size = 395143 }, + { url = "https://files.pythonhosted.org/packages/2b/92/b42911c52cda2ba67a6418ffa7d08969edf2e760b09015593c8a8a27a97d/msgpack-1.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:88d1e966c9235c1d4e2afac21ca83933ba59537e2e2727a999bf3f515ca2af26", size = 395986 }, + { url = "https://files.pythonhosted.org/packages/61/dc/8ae165337e70118d4dab651b8b562dd5066dd1e6dd57b038f32ebc3e2f07/msgpack-1.1.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f6d58656842e1b2ddbe07f43f56b10a60f2ba5826164910968f5933e5178af75", size = 402682 }, + { url = "https://files.pythonhosted.org/packages/58/27/555851cb98dcbd6ce041df1eacb25ac30646575e9cd125681aa2f4b1b6f1/msgpack-1.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96decdfc4adcbc087f5ea7ebdcfd3dee9a13358cae6e81d54be962efc38f6338", size = 406368 }, + { url = "https://files.pythonhosted.org/packages/d4/64/39a26add4ce16f24e99eabb9005e44c663db00e3fce17d4ae1ae9d61df99/msgpack-1.1.1-cp310-cp310-win32.whl", hash = "sha256:6640fd979ca9a212e4bcdf6eb74051ade2c690b862b679bfcb60ae46e6dc4bfd", size = 65004 }, + { url = "https://files.pythonhosted.org/packages/7d/18/73dfa3e9d5d7450d39debde5b0d848139f7de23bd637a4506e36c9800fd6/msgpack-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:8b65b53204fe1bd037c40c4148d00ef918eb2108d24c9aaa20bc31f9810ce0a8", size = 71548 }, + { url = "https://files.pythonhosted.org/packages/7f/83/97f24bf9848af23fe2ba04380388216defc49a8af6da0c28cc636d722502/msgpack-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:71ef05c1726884e44f8b1d1773604ab5d4d17729d8491403a705e649116c9558", size = 82728 }, + { url = "https://files.pythonhosted.org/packages/aa/7f/2eaa388267a78401f6e182662b08a588ef4f3de6f0eab1ec09736a7aaa2b/msgpack-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36043272c6aede309d29d56851f8841ba907a1a3d04435e43e8a19928e243c1d", size = 79279 }, + { url = "https://files.pythonhosted.org/packages/f8/46/31eb60f4452c96161e4dfd26dbca562b4ec68c72e4ad07d9566d7ea35e8a/msgpack-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32747b1b39c3ac27d0670122b57e6e57f28eefb725e0b625618d1b59bf9d1e0", size = 423859 }, + { url = "https://files.pythonhosted.org/packages/45/16/a20fa8c32825cc7ae8457fab45670c7a8996d7746ce80ce41cc51e3b2bd7/msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a8b10fdb84a43e50d38057b06901ec9da52baac6983d3f709d8507f3889d43f", size = 429975 }, + { url = "https://files.pythonhosted.org/packages/86/ea/6c958e07692367feeb1a1594d35e22b62f7f476f3c568b002a5ea09d443d/msgpack-1.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba0c325c3f485dc54ec298d8b024e134acf07c10d494ffa24373bea729acf704", size = 413528 }, + { url = "https://files.pythonhosted.org/packages/75/05/ac84063c5dae79722bda9f68b878dc31fc3059adb8633c79f1e82c2cd946/msgpack-1.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:88daaf7d146e48ec71212ce21109b66e06a98e5e44dca47d853cbfe171d6c8d2", size = 413338 }, + { url = "https://files.pythonhosted.org/packages/69/e8/fe86b082c781d3e1c09ca0f4dacd457ede60a13119b6ce939efe2ea77b76/msgpack-1.1.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8b55ea20dc59b181d3f47103f113e6f28a5e1c89fd5b67b9140edb442ab67f2", size = 422658 }, + { url = "https://files.pythonhosted.org/packages/3b/2b/bafc9924df52d8f3bb7c00d24e57be477f4d0f967c0a31ef5e2225e035c7/msgpack-1.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a28e8072ae9779f20427af07f53bbb8b4aa81151054e882aee333b158da8752", size = 427124 }, + { url = "https://files.pythonhosted.org/packages/a2/3b/1f717e17e53e0ed0b68fa59e9188f3f610c79d7151f0e52ff3cd8eb6b2dc/msgpack-1.1.1-cp311-cp311-win32.whl", hash = "sha256:7da8831f9a0fdb526621ba09a281fadc58ea12701bc709e7b8cbc362feabc295", size = 65016 }, + { url = "https://files.pythonhosted.org/packages/48/45/9d1780768d3b249accecc5a38c725eb1e203d44a191f7b7ff1941f7df60c/msgpack-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fd1b58e1431008a57247d6e7cc4faa41c3607e8e7d4aaf81f7c29ea013cb458", size = 72267 }, + { url = "https://files.pythonhosted.org/packages/e3/26/389b9c593eda2b8551b2e7126ad3a06af6f9b44274eb3a4f054d48ff7e47/msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238", size = 82359 }, + { url = "https://files.pythonhosted.org/packages/ab/65/7d1de38c8a22cf8b1551469159d4b6cf49be2126adc2482de50976084d78/msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157", size = 79172 }, + { url = "https://files.pythonhosted.org/packages/0f/bd/cacf208b64d9577a62c74b677e1ada005caa9b69a05a599889d6fc2ab20a/msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce", size = 425013 }, + { url = "https://files.pythonhosted.org/packages/4d/ec/fd869e2567cc9c01278a736cfd1697941ba0d4b81a43e0aa2e8d71dab208/msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a", size = 426905 }, + { url = "https://files.pythonhosted.org/packages/55/2a/35860f33229075bce803a5593d046d8b489d7ba2fc85701e714fc1aaf898/msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c", size = 407336 }, + { url = "https://files.pythonhosted.org/packages/8c/16/69ed8f3ada150bf92745fb4921bd621fd2cdf5a42e25eb50bcc57a5328f0/msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b", size = 409485 }, + { url = "https://files.pythonhosted.org/packages/c6/b6/0c398039e4c6d0b2e37c61d7e0e9d13439f91f780686deb8ee64ecf1ae71/msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef", size = 412182 }, + { url = "https://files.pythonhosted.org/packages/b8/d0/0cf4a6ecb9bc960d624c93effaeaae75cbf00b3bc4a54f35c8507273cda1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a", size = 419883 }, + { url = "https://files.pythonhosted.org/packages/62/83/9697c211720fa71a2dfb632cad6196a8af3abea56eece220fde4674dc44b/msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c", size = 65406 }, + { url = "https://files.pythonhosted.org/packages/c0/23/0abb886e80eab08f5e8c485d6f13924028602829f63b8f5fa25a06636628/msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4", size = 72558 }, + { url = "https://files.pythonhosted.org/packages/a1/38/561f01cf3577430b59b340b51329803d3a5bf6a45864a55f4ef308ac11e3/msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0", size = 81677 }, + { url = "https://files.pythonhosted.org/packages/09/48/54a89579ea36b6ae0ee001cba8c61f776451fad3c9306cd80f5b5c55be87/msgpack-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8ddb2bcfd1a8b9e431c8d6f4f7db0773084e107730ecf3472f1dfe9ad583f3d9", size = 78603 }, + { url = "https://files.pythonhosted.org/packages/a0/60/daba2699b308e95ae792cdc2ef092a38eb5ee422f9d2fbd4101526d8a210/msgpack-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:196a736f0526a03653d829d7d4c5500a97eea3648aebfd4b6743875f28aa2af8", size = 420504 }, + { url = "https://files.pythonhosted.org/packages/20/22/2ebae7ae43cd8f2debc35c631172ddf14e2a87ffcc04cf43ff9df9fff0d3/msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d592d06e3cc2f537ceeeb23d38799c6ad83255289bb84c2e5792e5a8dea268a", size = 423749 }, + { url = "https://files.pythonhosted.org/packages/40/1b/54c08dd5452427e1179a40b4b607e37e2664bca1c790c60c442c8e972e47/msgpack-1.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4df2311b0ce24f06ba253fda361f938dfecd7b961576f9be3f3fbd60e87130ac", size = 404458 }, + { url = "https://files.pythonhosted.org/packages/2e/60/6bb17e9ffb080616a51f09928fdd5cac1353c9becc6c4a8abd4e57269a16/msgpack-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4141c5a32b5e37905b5940aacbc59739f036930367d7acce7a64e4dec1f5e0b", size = 405976 }, + { url = "https://files.pythonhosted.org/packages/ee/97/88983e266572e8707c1f4b99c8fd04f9eb97b43f2db40e3172d87d8642db/msgpack-1.1.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b1ce7f41670c5a69e1389420436f41385b1aa2504c3b0c30620764b15dded2e7", size = 408607 }, + { url = "https://files.pythonhosted.org/packages/bc/66/36c78af2efaffcc15a5a61ae0df53a1d025f2680122e2a9eb8442fed3ae4/msgpack-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4147151acabb9caed4e474c3344181e91ff7a388b888f1e19ea04f7e73dc7ad5", size = 424172 }, + { url = "https://files.pythonhosted.org/packages/8c/87/a75eb622b555708fe0427fab96056d39d4c9892b0c784b3a721088c7ee37/msgpack-1.1.1-cp313-cp313-win32.whl", hash = "sha256:500e85823a27d6d9bba1d057c871b4210c1dd6fb01fbb764e37e4e8847376323", size = 65347 }, + { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341 }, +] + [[package]] name = "msgspec" version = "0.19.0" @@ -3220,7 +3270,7 @@ name = "nvidia-cudnn-cu12" version = "9.5.1.17" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/2a/78/4535c9c7f859a64781e43c969a3a7e84c54634e319a996d43ef32ce46f83/nvidia_cudnn_cu12-9.5.1.17-py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:30ac3869f6db17d170e0e556dd6cc5eee02647abc31ca856634d5a40f82c15b2", size = 570988386 }, @@ -3231,7 +3281,7 @@ name = "nvidia-cufft-cu12" version = "11.3.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/8f/16/73727675941ab8e6ffd86ca3a4b7b47065edcca7a997920b831f8147c99d/nvidia_cufft_cu12-11.3.0.4-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:ccba62eb9cef5559abd5e0d54ceed2d9934030f51163df018532142a8ec533e5", size = 200221632 }, @@ -3260,9 +3310,9 @@ name = "nvidia-cusolver-cu12" version = "11.7.1.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-cublas-cu12" }, - { name = "nvidia-cusparse-cu12" }, - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-cublas-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-cusparse-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/f0/6e/c2cf12c9ff8b872e92b4a5740701e51ff17689c4d726fca91875b07f655d/nvidia_cusolver_cu12-11.7.1.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e9e49843a7707e42022babb9bcfa33c29857a93b88020c4e4434656a655b698c", size = 158229790 }, @@ -3274,7 +3324,7 @@ name = "nvidia-cusparse-cu12" version = "12.5.4.2" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "nvidia-nvjitlink-cu12" }, + { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/06/1e/b8b7c2f4099a37b96af5c9bb158632ea9e5d9d27d7391d7eb8fc45236674/nvidia_cusparse_cu12-12.5.4.2-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7556d9eca156e18184b94947ade0fba5bb47d69cec46bf8660fd2c71a4b48b73", size = 216561367 }, @@ -5785,8 +5835,8 @@ wheels = [ [package.optional-dependencies] sentencepiece = [ - { name = "protobuf" }, - { name = "sentencepiece" }, + { name = "protobuf", marker = "sys_platform == 'darwin'" }, + { name = "sentencepiece", marker = "sys_platform == 'darwin'" }, ] [[package]] @@ -5794,7 +5844,7 @@ name = "triton" version = "3.3.1" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "setuptools" }, + { name = "setuptools", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ { url = "https://files.pythonhosted.org/packages/8d/a9/549e51e9b1b2c9b854fd761a1d23df0ba2fbc60bd0c13b489ffa518cfcb7/triton-3.3.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b74db445b1c562844d3cfad6e9679c72e93fdfb1a90a24052b03bb5c49d1242e", size = 155600257 },