#!/usr/bin/env fish # Set default parameters set domain "rpj_wiki" set embedder "facebook/contriever-msmarco" set k 5 set tasks "nq" "trivia" "hotpot" "gpqa" # Parse command line arguments for i in (seq 1 (count $argv)) switch $argv[$i] case "--domain" set domain $argv[(math $i + 1)] case "--embedder" set embedder $argv[(math $i + 1)] case "--k" set k $argv[(math $i + 1)] case "--tasks" set j (math $i + 1) set tasks while test $j -le (count $argv) && not string match -q -- "--*" $argv[$j] set -a tasks $argv[$j] set j (math $j + 1) end end end echo "Running with the following parameters:" echo "Domain: $domain" echo "Embedder: $embedder" echo "k: $k" echo "Datasets: $tasks" # Create directory for results set results_dir "retrieval_results" mkdir -p $results_dir # Process each dataset using retrieval_demo directly for task in $tasks echo "" echo "===== Processing dataset: $task =====" # Step 1: Run retrieval_demo with flat index to generate cache and get results echo "Running retrieval for $task..." echo "python demo/main.py --domain $domain --task $task --search --load flat --lazy" python demo/main.py --domain $domain --task $task --search --load flat --lazy # Check if successful if test $status -ne 0 echo "Retrieval for $task failed" continue end echo "Completed processing for $task" echo "--------------------------------" end echo "All operations completed successfully!" echo "The cache files have been created at the locations specified by get_flat_cache_path() in config.py" echo "You can now use test_all_datasets.py to view the results"