Initial commit
This commit is contained in:
60
research/utils/generate_dataset_cache.fish
Normal file
60
research/utils/generate_dataset_cache.fish
Normal file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env fish
|
||||
|
||||
# Set default parameters
|
||||
set domain "rpj_wiki"
|
||||
set embedder "facebook/contriever-msmarco"
|
||||
set k 5
|
||||
set tasks "nq" "trivia" "hotpot" "gpqa"
|
||||
|
||||
# Parse command line arguments
|
||||
for i in (seq 1 (count $argv))
|
||||
switch $argv[$i]
|
||||
case "--domain"
|
||||
set domain $argv[(math $i + 1)]
|
||||
case "--embedder"
|
||||
set embedder $argv[(math $i + 1)]
|
||||
case "--k"
|
||||
set k $argv[(math $i + 1)]
|
||||
case "--tasks"
|
||||
set j (math $i + 1)
|
||||
set tasks
|
||||
while test $j -le (count $argv) && not string match -q -- "--*" $argv[$j]
|
||||
set -a tasks $argv[$j]
|
||||
set j (math $j + 1)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
echo "Running with the following parameters:"
|
||||
echo "Domain: $domain"
|
||||
echo "Embedder: $embedder"
|
||||
echo "k: $k"
|
||||
echo "Datasets: $tasks"
|
||||
|
||||
# Create directory for results
|
||||
set results_dir "retrieval_results"
|
||||
mkdir -p $results_dir
|
||||
|
||||
# Process each dataset using retrieval_demo directly
|
||||
for task in $tasks
|
||||
echo ""
|
||||
echo "===== Processing dataset: $task ====="
|
||||
|
||||
# Step 1: Run retrieval_demo with flat index to generate cache and get results
|
||||
echo "Running retrieval for $task..."
|
||||
echo "python demo/main.py --domain $domain --task $task --search --load flat --lazy"
|
||||
python demo/main.py --domain $domain --task $task --search --load flat --lazy
|
||||
|
||||
# Check if successful
|
||||
if test $status -ne 0
|
||||
echo "Retrieval for $task failed"
|
||||
continue
|
||||
end
|
||||
|
||||
echo "Completed processing for $task"
|
||||
echo "--------------------------------"
|
||||
end
|
||||
|
||||
echo "All operations completed successfully!"
|
||||
echo "The cache files have been created at the locations specified by get_flat_cache_path() in config.py"
|
||||
echo "You can now use test_all_datasets.py to view the results"
|
||||
Reference in New Issue
Block a user