Add ColQwen2.5 model support and improve model selection

- Add ColQwen2.5 and ColQwen2_5_Processor imports
- Implement smart model type detection for colqwen2, colqwen2.5, and colpali
- Add task name aliases for easier benchmark invocation
- Add safe model name handling for file paths and index naming
- Support custom model paths including LoRA adapters
- Improve model choice validation and error handling

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
yichuan-w
2025-12-05 11:35:30 +00:00
parent 76cc798e3e
commit aaadb00e44
2 changed files with 108 additions and 15 deletions

View File

@@ -152,20 +152,65 @@ def _select_device_and_dtype():
def _load_colvision(model_choice: str):
import torch
from colpali_engine.models import ColPali, ColQwen2, ColQwen2Processor
from colpali_engine.models import (
ColPali,
ColQwen2,
ColQwen2_5,
ColQwen2_5_Processor,
ColQwen2Processor,
)
from colpali_engine.models.paligemma.colpali.processing_colpali import ColPaliProcessor
from transformers.utils.import_utils import is_flash_attn_2_available
device_str, device, dtype = _select_device_and_dtype()
# Determine model name and type
# IMPORTANT: Check colqwen2.5 BEFORE colqwen2 to avoid false matches
model_choice_lower = model_choice.lower()
if model_choice == "colqwen2":
model_name = "vidore/colqwen2-v1.0"
# On CPU/MPS we must avoid flash-attn and stay eager; on CUDA prefer flash-attn if available
model_type = "colqwen2"
elif model_choice == "colqwen2.5" or model_choice == "colqwen25":
model_name = "vidore/colqwen2.5-v0.2"
model_type = "colqwen2.5"
elif model_choice == "colpali":
model_name = "vidore/colpali-v1.2"
model_type = "colpali"
elif (
"colqwen2.5" in model_choice_lower
or "colqwen25" in model_choice_lower
or "colqwen2_5" in model_choice_lower
):
# Handle HuggingFace model names like "vidore/colqwen2.5-v0.2"
model_name = model_choice
model_type = "colqwen2.5"
elif "colqwen2" in model_choice_lower and "colqwen2-v1.0" in model_choice_lower:
# Handle HuggingFace model names like "vidore/colqwen2-v1.0" (but not colqwen2.5)
model_name = model_choice
model_type = "colqwen2"
elif "colpali" in model_choice_lower:
# Handle HuggingFace model names like "vidore/colpali-v1.2"
model_name = model_choice
model_type = "colpali"
else:
# Default to colpali for backward compatibility
model_name = "vidore/colpali-v1.2"
model_type = "colpali"
# Load model based on type
attn_implementation = (
"flash_attention_2"
if (device_str == "cuda" and is_flash_attn_2_available())
else "eager"
"flash_attention_2" if (device_str == "cuda" and is_flash_attn_2_available()) else "eager"
)
if model_type == "colqwen2.5":
model = ColQwen2_5.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
device_map=device,
attn_implementation=attn_implementation,
).eval()
processor = ColQwen2_5_Processor.from_pretrained(model_name)
elif model_type == "colqwen2":
model = ColQwen2.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,
@@ -173,8 +218,7 @@ def _load_colvision(model_choice: str):
attn_implementation=attn_implementation,
).eval()
processor = ColQwen2Processor.from_pretrained(model_name)
else:
model_name = "vidore/colpali-v1.2"
else: # colpali
model = ColPali.from_pretrained(
model_name,
torch_dtype=torch.bfloat16,

View File

@@ -90,6 +90,51 @@ VIDORE_V1_TASKS = {
},
}
# Task name aliases (short names -> full names)
TASK_ALIASES = {
"arxivqa": "VidoreArxivQARetrieval",
"docvqa": "VidoreDocVQARetrieval",
"infovqa": "VidoreInfoVQARetrieval",
"tabfquad": "VidoreTabfquadRetrieval",
"tatdqa": "VidoreTatdqaRetrieval",
"shiftproject": "VidoreShiftProjectRetrieval",
"syntheticdocqa_ai": "VidoreSyntheticDocQAAIRetrieval",
"syntheticdocqa_energy": "VidoreSyntheticDocQAEnergyRetrieval",
"syntheticdocqa_government": "VidoreSyntheticDocQAGovernmentReportsRetrieval",
"syntheticdocqa_healthcare": "VidoreSyntheticDocQAHealthcareIndustryRetrieval",
}
def normalize_task_name(task_name: str) -> str:
"""Normalize task name (handle aliases)."""
task_name_lower = task_name.lower()
if task_name in VIDORE_V1_TASKS:
return task_name
if task_name_lower in TASK_ALIASES:
return TASK_ALIASES[task_name_lower]
# Try partial match
for alias, full_name in TASK_ALIASES.items():
if alias in task_name_lower or task_name_lower in alias:
return full_name
return task_name
def get_safe_model_name(model_name: str) -> str:
"""Get a safe model name for use in file paths."""
import hashlib
import os
# If it's a path, use basename or hash
if os.path.exists(model_name) and os.path.isdir(model_name):
# Use basename if it's reasonable, otherwise use hash
basename = os.path.basename(model_name.rstrip("/"))
if basename and len(basename) < 100 and not basename.startswith("."):
return basename
# Use hash for very long or problematic paths
return hashlib.md5(model_name.encode()).hexdigest()[:16]
# For HuggingFace model names, replace / with _
return model_name.replace("/", "_").replace(":", "_")
def load_vidore_v1_data(
dataset_path: str,
@@ -181,6 +226,9 @@ def evaluate_task(
print(f"Evaluating task: {task_name}")
print(f"{'=' * 80}")
# Normalize task name (handle aliases)
task_name = normalize_task_name(task_name)
# Get task config
if task_name not in VIDORE_V1_TASKS:
raise ValueError(f"Unknown task: {task_name}. Available: {list(VIDORE_V1_TASKS.keys())}")
@@ -223,11 +271,13 @@ def evaluate_task(
)
# Build or load index
# Use safe model name for index path (different models need different indexes)
safe_model_name = get_safe_model_name(model_name)
index_path_full = index_path if not use_fast_plaid else fast_plaid_index_path
if index_path_full is None:
index_path_full = f"./indexes/{task_name}_{model_name}"
index_path_full = f"./indexes/{task_name}_{safe_model_name}"
if use_fast_plaid:
index_path_full = f"./indexes/{task_name}_{model_name}_fastplaid"
index_path_full = f"./indexes/{task_name}_{safe_model_name}_fastplaid"
index_or_retriever, corpus_ids_ordered = evaluator.build_index_from_corpus(
corpus=corpus,
@@ -281,8 +331,7 @@ def main():
"--model",
type=str,
default="colqwen2",
choices=["colqwen2", "colpali"],
help="Model to use",
help="Model to use: 'colqwen2', 'colpali', or path to a model directory (supports LoRA adapters)",
)
parser.add_argument(
"--task",
@@ -350,11 +399,11 @@ def main():
# Determine tasks to evaluate
if args.task:
tasks_to_eval = [args.task]
tasks_to_eval = [normalize_task_name(args.task)]
elif args.tasks.lower() == "all":
tasks_to_eval = list(VIDORE_V1_TASKS.keys())
else:
tasks_to_eval = [t.strip() for t in args.tasks.split(",")]
tasks_to_eval = [normalize_task_name(t.strip()) for t in args.tasks.split(",")]
print(f"Tasks to evaluate: {tasks_to_eval}")