chore: sync changes; fix Ruff import order; update examples, benchmarks, and dependencies

- Fix import order in packages/leann-backend-hnsw/leann_backend_hnsw/hnsw_backend.py (Ruff I001)

- Update benchmarks/run_evaluation.py

- Update apps/base_rag_example.py and leann-core API usage

- Add benchmarks/data/README.md

- Update uv.lock

- Misc cleanup

- Note: added paru-bin as an embedded git repo; consider making it a submodule (git rm --cached paru-bin) if unintended
This commit is contained in:
yichuan-w
2025-08-18 15:49:16 -07:00
parent be405a5851
commit 0d232021f9
6 changed files with 3630 additions and 3774 deletions

View File

@@ -268,7 +268,6 @@ class BaseRAGExample(ABC):
chat = LeannChat( chat = LeannChat(
index_path, index_path,
llm_config=self.get_llm_config(args), llm_config=self.get_llm_config(args),
system_prompt=f"You are a helpful assistant that answers questions about {self.name} data.",
complexity=args.search_complexity, complexity=args.search_complexity,
) )

44
benchmarks/data/README.md Executable file
View File

@@ -0,0 +1,44 @@
---
license: mit
---
# LEANN-RAG Evaluation Data
This repository contains the necessary data to run the recall evaluation scripts for the [LEANN-RAG](https://huggingface.co/LEANN-RAG) project.
## Dataset Components
This dataset is structured into three main parts:
1. **Pre-built LEANN Indices**:
* `dpr/`: A pre-built index for the DPR dataset.
* `rpj_wiki/`: A pre-built index for the RPJ-Wiki dataset.
These indices were created using the `leann-core` library and are required by the `LeannSearcher`.
2. **Ground Truth Data**:
* `ground_truth/`: Contains the ground truth files (`flat_results_nq_k3.json`) for both the DPR and RPJ-Wiki datasets. These files map queries to the original passage IDs from the Natural Questions benchmark, evaluated using the Contriever model.
3. **Queries**:
* `queries/`: Contains the `nq_open.jsonl` file with the Natural Questions queries used for the evaluation.
## Usage
To use this data, you can download it locally using the `huggingface-hub` library. First, install the library:
```bash
pip install huggingface-hub
```
Then, you can download the entire dataset to a local directory (e.g., `data/`) with the following Python script:
```python
from huggingface_hub import snapshot_download
snapshot_download(
repo_id="LEANN-RAG/leann-rag-evaluation-data",
repo_type="dataset",
local_dir="data"
)
```
This will download all the necessary files into a local `data` folder, preserving the repository structure. The evaluation scripts in the main [LEANN-RAG Space](https://huggingface.co/LEANN-RAG) are configured to work with this data structure.

View File

@@ -12,7 +12,7 @@ import time
from pathlib import Path from pathlib import Path
import numpy as np import numpy as np
from leann.api import LeannBuilder, LeannSearcher from leann.api import LeannBuilder, LeannChat, LeannSearcher
def download_data_if_needed(data_root: Path, download_embeddings: bool = False): def download_data_if_needed(data_root: Path, download_embeddings: bool = False):
@@ -197,6 +197,19 @@ def main():
parser.add_argument( parser.add_argument(
"--ef-search", type=int, default=120, help="The 'efSearch' parameter for HNSW." "--ef-search", type=int, default=120, help="The 'efSearch' parameter for HNSW."
) )
parser.add_argument(
"--llm-type",
type=str,
choices=["ollama", "hf", "openai", "gemini", "simulated"],
default="ollama",
help="LLM backend type to optionally query during evaluation (default: ollama)",
)
parser.add_argument(
"--llm-model",
type=str,
default="qwen3:1.7b",
help="LLM model identifier for the chosen backend (default: qwen3:1.7b)",
)
args = parser.parse_args() args = parser.parse_args()
# --- Path Configuration --- # --- Path Configuration ---
@@ -318,9 +331,14 @@ def main():
for i in range(num_eval_queries): for i in range(num_eval_queries):
start_time = time.time() start_time = time.time()
new_results = searcher.search(queries[i], top_k=args.top_k, ef=args.ef_search) new_results = searcher.search(queries[i], top_k=args.top_k, complexity=args.ef_search)
search_times.append(time.time() - start_time) search_times.append(time.time() - start_time)
# Optional: also call the LLM with configurable backend/model (does not affect recall)
llm_config = {"type": args.llm_type, "model": args.llm_model}
chat = LeannChat(args.index_path, llm_config=llm_config, searcher=searcher)
answer = chat.ask(queries[i], top_k=args.top_k, complexity=args.ef_search)
print(f"Answer: {answer}")
# Correct Recall Calculation: Based on TEXT content # Correct Recall Calculation: Based on TEXT content
new_texts = {result.text for result in new_results} new_texts = {result.text for result in new_results}

View File

@@ -613,9 +613,9 @@ class LeannSearcher:
use_server_if_available=recompute_embeddings, use_server_if_available=recompute_embeddings,
zmq_port=zmq_port, zmq_port=zmq_port,
) )
# logger.info(f" Generated embedding shape: {query_embedding.shape}") logger.info(f" Generated embedding shape: {query_embedding.shape}")
# time.time() - start_time embedding_time = time.time() - start_time
# logger.info(f" Embedding time: {embedding_time} seconds") logger.info(f" Embedding time: {embedding_time} seconds")
start_time = time.time() start_time = time.time()
results = self.backend_impl.search( results = self.backend_impl.search(
@@ -629,7 +629,8 @@ class LeannSearcher:
zmq_port=zmq_port, zmq_port=zmq_port,
**kwargs, **kwargs,
) )
# logger.info(f" Search time: {search_time} seconds") search_time = time.time() - start_time
logger.info(f" Search time in search() LEANN searcher: {search_time} seconds")
logger.info(f" Backend returned: labels={len(results.get('labels', [[]])[0])} results") logger.info(f" Backend returned: labels={len(results.get('labels', [[]])[0])} results")
enriched_results = [] enriched_results = []
@@ -708,9 +709,15 @@ class LeannChat:
index_path: str, index_path: str,
llm_config: Optional[dict[str, Any]] = None, llm_config: Optional[dict[str, Any]] = None,
enable_warmup: bool = False, enable_warmup: bool = False,
searcher: Optional[LeannSearcher] = None,
**kwargs, **kwargs,
): ):
self.searcher = LeannSearcher(index_path, enable_warmup=enable_warmup, **kwargs) if searcher is None:
self.searcher = LeannSearcher(index_path, enable_warmup=enable_warmup, **kwargs)
self._owns_searcher = True
else:
self.searcher = searcher
self._owns_searcher = False
self.llm = get_llm(llm_config) self.llm = get_llm(llm_config)
def ask( def ask(
@@ -741,7 +748,7 @@ class LeannChat:
**search_kwargs, **search_kwargs,
) )
search_time = time.time() - search_time search_time = time.time() - search_time
# logger.info(f" Search time: {search_time} seconds") logger.info(f" Search time: {search_time} seconds")
context = "\n\n".join([r.text for r in results]) context = "\n\n".join([r.text for r in results])
prompt = ( prompt = (
"Here is some retrieved context that might help answer your question:\n\n" "Here is some retrieved context that might help answer your question:\n\n"
@@ -777,7 +784,9 @@ class LeannChat:
This method should be called after you're done using the chat interface, This method should be called after you're done using the chat interface,
especially in test environments or batch processing scenarios. especially in test environments or batch processing scenarios.
""" """
if hasattr(self.searcher, "cleanup"): # Only stop the embedding server if this LeannChat instance created the searcher.
# When a shared searcher is passed in, avoid shutting down the server to enable reuse.
if getattr(self, "_owns_searcher", False) and hasattr(self.searcher, "cleanup"):
self.searcher.cleanup() self.searcher.cleanup()
# Enable automatic cleanup patterns # Enable automatic cleanup patterns

1
paru-bin Submodule

Submodule paru-bin added at 92a55429af

7313
uv.lock generated
View File

File diff suppressed because it is too large Load Diff