change rebuild logic

This commit is contained in:
yichuan520030910320
2025-08-03 20:54:52 -07:00
parent 7ad2ec39d6
commit baf70dc411
2 changed files with 5 additions and 6 deletions

View File

@@ -179,7 +179,7 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
# Core Parameters (General preprocessing for all examples)
--index-dir DIR # Directory to store the index (default: current directory)
--query "YOUR QUESTION" # Single query mode. Omit for interactive chat (type 'quit' to exit)
--max-items N # Limit data preprocessing (default: 1000 items, use -1 to process all data)
--max-items N # Limit data preprocessing (default: -1, process all data)
--force-rebuild # Force rebuild index even if it exists
# Embedding Parameters

View File

@@ -50,12 +50,12 @@ class BaseRAGExample(ABC):
help="Query to run (if not provided, will run in interactive mode)",
)
# Allow subclasses to override default max_items
max_items_default = getattr(self, "max_items_default", 1000)
max_items_default = getattr(self, "max_items_default", -1)
core_group.add_argument(
"--max-items",
type=int,
default=max_items_default,
help=f"Maximum number of items to process (default: {max_items_default}, -1 for all)",
help="Maximum number of items to process -1 for all, means index all documents, and you should set it to a reasonable number if you have a large dataset and try at the first time)",
)
core_group.add_argument(
"--force-rebuild", action="store_true", help="Force rebuild index even if it exists"
@@ -256,7 +256,7 @@ class BaseRAGExample(ABC):
# Check if index exists
index_path = str(Path(args.index_dir) / f"{self.default_index_name}.leann")
index_exists = Path(index_path).exists()
index_exists = Path(args.index_dir).exists()
if not index_exists or args.force_rebuild:
# Load data and build index
@@ -268,9 +268,8 @@ class BaseRAGExample(ABC):
return
index_path = await self.build_index(args, texts)
print(f"Index saved to: {index_path}")
else:
print(f"\nUsing existing index: {index_path}")
print(f"\nUsing existing index in {args.index_dir}")
# Run query or interactive mode
if args.query: