From baf70dc411bde4eac7e51673a89d06783fdfc482 Mon Sep 17 00:00:00 2001 From: yichuan520030910320 Date: Sun, 3 Aug 2025 20:54:52 -0700 Subject: [PATCH] change rebuild logic --- README.md | 2 +- examples/base_rag_example.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2dcc775..114912c 100755 --- a/README.md +++ b/README.md @@ -179,7 +179,7 @@ All RAG examples share these common parameters. **Interactive mode** is availabl # Core Parameters (General preprocessing for all examples) --index-dir DIR # Directory to store the index (default: current directory) --query "YOUR QUESTION" # Single query mode. Omit for interactive chat (type 'quit' to exit) ---max-items N # Limit data preprocessing (default: 1000 items, use -1 to process all data) +--max-items N # Limit data preprocessing (default: -1, process all data) --force-rebuild # Force rebuild index even if it exists # Embedding Parameters diff --git a/examples/base_rag_example.py b/examples/base_rag_example.py index 69a91ae..a164b3c 100644 --- a/examples/base_rag_example.py +++ b/examples/base_rag_example.py @@ -50,12 +50,12 @@ class BaseRAGExample(ABC): help="Query to run (if not provided, will run in interactive mode)", ) # Allow subclasses to override default max_items - max_items_default = getattr(self, "max_items_default", 1000) + max_items_default = getattr(self, "max_items_default", -1) core_group.add_argument( "--max-items", type=int, default=max_items_default, - help=f"Maximum number of items to process (default: {max_items_default}, -1 for all)", + help="Maximum number of items to process -1 for all, means index all documents, and you should set it to a reasonable number if you have a large dataset and try at the first time)", ) core_group.add_argument( "--force-rebuild", action="store_true", help="Force rebuild index even if it exists" @@ -256,7 +256,7 @@ class BaseRAGExample(ABC): # Check if index exists index_path = str(Path(args.index_dir) / f"{self.default_index_name}.leann") - index_exists = Path(index_path).exists() + index_exists = Path(args.index_dir).exists() if not index_exists or args.force_rebuild: # Load data and build index @@ -268,9 +268,8 @@ class BaseRAGExample(ABC): return index_path = await self.build_index(args, texts) - print(f"Index saved to: {index_path}") else: - print(f"\nUsing existing index: {index_path}") + print(f"\nUsing existing index in {args.index_dir}") # Run query or interactive mode if args.query: