change rebuild logic
This commit is contained in:
@@ -179,7 +179,7 @@ All RAG examples share these common parameters. **Interactive mode** is availabl
|
|||||||
# Core Parameters (General preprocessing for all examples)
|
# Core Parameters (General preprocessing for all examples)
|
||||||
--index-dir DIR # Directory to store the index (default: current directory)
|
--index-dir DIR # Directory to store the index (default: current directory)
|
||||||
--query "YOUR QUESTION" # Single query mode. Omit for interactive chat (type 'quit' to exit)
|
--query "YOUR QUESTION" # Single query mode. Omit for interactive chat (type 'quit' to exit)
|
||||||
--max-items N # Limit data preprocessing (default: 1000 items, use -1 to process all data)
|
--max-items N # Limit data preprocessing (default: -1, process all data)
|
||||||
--force-rebuild # Force rebuild index even if it exists
|
--force-rebuild # Force rebuild index even if it exists
|
||||||
|
|
||||||
# Embedding Parameters
|
# Embedding Parameters
|
||||||
|
|||||||
@@ -50,12 +50,12 @@ class BaseRAGExample(ABC):
|
|||||||
help="Query to run (if not provided, will run in interactive mode)",
|
help="Query to run (if not provided, will run in interactive mode)",
|
||||||
)
|
)
|
||||||
# Allow subclasses to override default max_items
|
# Allow subclasses to override default max_items
|
||||||
max_items_default = getattr(self, "max_items_default", 1000)
|
max_items_default = getattr(self, "max_items_default", -1)
|
||||||
core_group.add_argument(
|
core_group.add_argument(
|
||||||
"--max-items",
|
"--max-items",
|
||||||
type=int,
|
type=int,
|
||||||
default=max_items_default,
|
default=max_items_default,
|
||||||
help=f"Maximum number of items to process (default: {max_items_default}, -1 for all)",
|
help="Maximum number of items to process -1 for all, means index all documents, and you should set it to a reasonable number if you have a large dataset and try at the first time)",
|
||||||
)
|
)
|
||||||
core_group.add_argument(
|
core_group.add_argument(
|
||||||
"--force-rebuild", action="store_true", help="Force rebuild index even if it exists"
|
"--force-rebuild", action="store_true", help="Force rebuild index even if it exists"
|
||||||
@@ -256,7 +256,7 @@ class BaseRAGExample(ABC):
|
|||||||
|
|
||||||
# Check if index exists
|
# Check if index exists
|
||||||
index_path = str(Path(args.index_dir) / f"{self.default_index_name}.leann")
|
index_path = str(Path(args.index_dir) / f"{self.default_index_name}.leann")
|
||||||
index_exists = Path(index_path).exists()
|
index_exists = Path(args.index_dir).exists()
|
||||||
|
|
||||||
if not index_exists or args.force_rebuild:
|
if not index_exists or args.force_rebuild:
|
||||||
# Load data and build index
|
# Load data and build index
|
||||||
@@ -268,9 +268,8 @@ class BaseRAGExample(ABC):
|
|||||||
return
|
return
|
||||||
|
|
||||||
index_path = await self.build_index(args, texts)
|
index_path = await self.build_index(args, texts)
|
||||||
print(f"Index saved to: {index_path}")
|
|
||||||
else:
|
else:
|
||||||
print(f"\nUsing existing index: {index_path}")
|
print(f"\nUsing existing index in {args.index_dir}")
|
||||||
|
|
||||||
# Run query or interactive mode
|
# Run query or interactive mode
|
||||||
if args.query:
|
if args.query:
|
||||||
|
|||||||
Reference in New Issue
Block a user