Skip to content

Commit 8feca23

Browse files
authored
[Fix] of "evaluation of llava_vid on mvbench" (EvolvingLMMs-Lab#541)
* Update README.md * Update README.md * [Fix] of "mvbench missing videos" Modify DATA_LIST so that it can find the corresponding video * [Fix] of "evaluation of llava_vid on mvbench" "mvbench_video/tvqa/frames_fps3_hq/castle_s07e04_seg02_clip_14" is the sampled video frame, not the original video file. And The current code logic cannot handle this subtask. * Fixing lmms_eval/models/llava_vid.py
1 parent 532ca07 commit 8feca23

File tree

2 files changed

+12
-8
lines changed

2 files changed

+12
-8
lines changed

lmms_eval/models/llava_vid.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import glob
12
import math
23
import os
34
from datetime import timedelta
@@ -416,6 +417,8 @@ def generate_until(self, requests) -> List[str]:
416417
visuals = doc_to_visual(self.task_dict[task][split][doc_id])
417418
# visuals = [visuals]
418419
# visuals = self.flatten(visuals)
420+
if os.path.isdir(visuals[0]):
421+
visuals = glob.glob(visuals[0] + "/*")
419422
videos = []
420423
try:
421424
# for visual in visuals:
@@ -440,7 +443,8 @@ def generate_until(self, requests) -> List[str]:
440443
frame_idx = sampled_indices.tolist()
441444
frame_time = [i / fps for i in frame_idx]
442445
frame_time = ",".join([f"{i:.2f}s" for i in frame_time])
443-
video = [visuals[i] for i in frame_idx]
446+
# video = [visuals[i] for i in frame_idx]
447+
video = np.stack([np.array(Image.open(visuals[i])) for i in frame_idx], axis=0)
444448

445449
video = self._image_processor.preprocess(video, return_tensors="pt")["pixel_values"].cuda()
446450
if self.torch_dtype == "bfloat16":

lmms_eval/tasks/mvbench/utils.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,22 @@
1616
from lmms_eval.tasks._task_utils.file_utils import generate_submission_file
1717

1818
DATA_LIST = {
19-
"object_interaction": "star/Charades_segment",
20-
"action_sequence": "star/Charades_segment",
21-
"action_prediction": "star/Charades_segment",
22-
"action_localization": "sta/sta_video_segment",
19+
"object_interaction": "star/Charades_v1_480",
20+
"action_sequence": "star/Charades_v1_480",
21+
"action_prediction": "star/Charades_v1_480",
22+
"action_localization": "sta_video",
2323
"moving_count": "clevrer/video_validation",
24-
"fine_grained_pose": "nturgbd_convert",
24+
"fine_grained_pose": "nturgbd",
2525
"character_order": "perception/videos",
2626
"object_shuffle": "perception/videos",
2727
"egocentric_navigation": "vlnqa",
2828
"moving_direction": "clevrer/video_validation",
29-
"episodic_reasoning": "tvqa/video_fps3_hq_segment",
29+
"episodic_reasoning": "tvqa/frames_fps3_hq",
3030
"fine_grained_action": "Moments_in_Time_Raw/videos",
3131
"scene_transition": "scene_qa/video",
3232
"state_change": "perception/videos",
3333
"moving_attribute": "clevrer/video_validation",
34-
"action_antonym": "ssv2_video_mp4",
34+
"action_antonym": "ssv2_video",
3535
"unexpected_action": "FunQA_test/test",
3636
"counterfactual_inference": "clevrer/video_validation",
3737
"object_existence": "clevrer/video_validation",

0 commit comments

Comments
 (0)