Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 84 additions & 11 deletions examples/models/bagel.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#!/bin/bash

# Bagel Model Evaluation Script
# Bagel Model Evaluation Script for GEdit-Bench
#
# This script demonstrates how to run lmms-eval with the Bagel multimodal model
# for text-to-image generation tasks.
# for image editing tasks using GEdit-Bench.
#
# Prerequisites:
# 1. Clone Bagel repository at lmms-eval root:
Expand All @@ -14,19 +14,92 @@
# Download from https://huggingface.co/ByteDance-Seed/BAGEL-7B-MoT
#
# Usage:
# bash examples/models/bagel.sh
# # Use local Qwen2.5-VL for evaluation:
# bash examples/models/bagel.sh qwen25vl
#
# # Use vLLM remote Qwen for evaluation:
# bash examples/models/bagel.sh vllm_qwen
#
# # Use GPT-4o for evaluation:
# bash examples/models/bagel.sh gpt4o

# Activate conda environment (uncomment and modify if needed)
# source miniconda3/etc/profile.d/conda.sh
# conda activate lmms-eval

# ============================================
# Configuration
# ============================================

MODEL_PATH=/your/path/to/models/BAGEL-7B-MoT
TASK=gedit_bench

# GEdit-Bench environment variables
export GEDIT_BENCH_MODEL_NAME="bagel"
export GEDIT_BENCH_OUTPUT_DIR="./logs/bagel_persistent_folder/bagel_generated_images"
export GEDIT_BENCH_VIE_KEY_PATH="./lmms_eval/tasks/gedit_bench/secret.env"

# Set model path - should point to the model weights directory
# Can be absolute path or relative path
MODEL_PATH=$1
export GOOGLE_API_KEY=<YOUR_GOOGLE_API_KEY>
TASK=$2
# ============================================
# Evaluation Backend Selection
# ============================================

# Run evaluation with BFloat16 (default, full precision)
accelerate launch -m lmms_eval \
# Get backend from command line argument, default to "qwen25vl"
EVAL_BACKBONE=${1:-vllm_qwen25vl}

if [ "$EVAL_BACKBONE" == "vllm_qwen" ] || [ "$EVAL_BACKBONE" == "vllm_qwen25vl" ] || [ "$EVAL_BACKBONE" == "vllm_qwen3vl" ]; then
echo "Using vLLM Qwen for VIEScore evaluation..."
export GEDIT_BENCH_VIE_BACKBONE="$EVAL_BACKBONE"
# vLLM API settings - modify these for your setup
export VLLM_API_BASE="http://host:8000/v1"
# export VLLM_API_BASE="${VLLM_API_BASE:-http://localhost:8000/v1}"
export VLLM_API_KEY="${VLLM_API_KEY:-EMPTY}"
export VLLM_MODEL_NAME="${VLLM_MODEL_NAME:-Qwen/Qwen2.5-VL-72B-Instruct-AWQ}"
echo " VLLM_API_BASE: $VLLM_API_BASE"
echo " VLLM_MODEL_NAME: $VLLM_MODEL_NAME"
elif [ "$EVAL_BACKBONE" == "gpt4o" ]; then
echo "Using GPT-4o for VIEScore evaluation..."
export GEDIT_BENCH_VIE_BACKBONE="gpt4o"
# Set your OpenAI API key
# export OPENAI_API_KEY="your-api-key-here"
else
echo "Using local Qwen2.5-VL for VIEScore evaluation..."
export GEDIT_BENCH_VIE_BACKBONE="qwen25vl"
fi

# ============================================
# Run Evaluation
# ============================================

echo "============================================"
echo "Starting GEdit-Bench evaluation..."
echo "============================================"
echo " Model: Bagel"
echo " Model Path: $MODEL_PATH"
echo " Evaluation Backend: $GEDIT_BENCH_VIE_BACKBONE"
echo " Output Directory: $GEDIT_BENCH_OUTPUT_DIR"
echo "============================================"
echo ""

# 图像编辑任务 (GEdit-Bench)
# task_mode=edit: 输入图像 + 编辑指令 -> 编辑后的图像
accelerate launch -m lmms_eval \
--model bagel \
--model_args pretrained=${MODEL_PATH},mode=1 \
--model_args pretrained=${MODEL_PATH},task_mode=edit \
--tasks $TASK \
--batch_size 1 \
--log_samples \
--output_path ./logs/

echo ""
echo "============================================"
echo "Evaluation complete!"
echo "============================================"

# 如果是文本生图任务,使用 task_mode=generate:
# accelerate launch -m lmms_eval \
# --model bagel \
# --model_args pretrained=${MODEL_PATH},task_mode=generate \
# --tasks ueval \
# --batch_size 1 \
# --log_samples \
# --output_path ./logs/
10 changes: 9 additions & 1 deletion lmms_eval/api/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -1057,13 +1057,21 @@ def concat_tar_parts(tar_parts, output_tar):
**dataset_kwargs if dataset_kwargs is not None else {},
)

# Ensure dataset is a DatasetDict so downstream logic that expects multiple splits works.
if not isinstance(self.dataset, datasets.DatasetDict):
split_name = self.config.test_split or self.config.validation_split or self.config.training_split or "train"
self.dataset = datasets.DatasetDict({split_name: self.dataset})

Comment on lines +1060 to +1064
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a too ground breaking changes. Might break too much

if self.config.process_docs is not None:
for split in self.dataset:
if split in [self.config.training_split, self.config.validation_split, self.config.test_split, self.config.fewshot_split]:
self.dataset[split] = self.config.process_docs(self.dataset[split])

# copy dataset, remove image features
self.dataset_no_image = self.dataset.copy()
try:
self.dataset_no_image = self.dataset.copy()
except AttributeError:
self.dataset_no_image = datasets.DatasetDict({k: v for k, v in self.dataset.items()})
Comment on lines +1071 to +1074
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be breaking changes also. Consider not to use this.

for doc_name in self.dataset_no_image:
remove_cols = []
features = self.dataset_no_image[doc_name].features
Expand Down
Loading