Skip to content

Commit 581a672

Browse files
yinanheheyinan
andauthored
[Fix] Fix cache_dir issue where MVBench cannot be found (#306)
* [add] add internvideo2 support && change mvbench to video branch * [add] answer_prompt of internvideo2 * [add] change video type of internvideo2 * [fix] update template of mvbench * [reformat] * [fix] generate_until_multi_round * [Feat] videochat2 support * [feat] Link cache_path to cache_dir if no unzip or untar * [feat] new variable in dataset kwargs: create_link --------- Co-authored-by: heyinan <[email protected]>
1 parent 99fcd39 commit 581a672

File tree

2 files changed

+15
-0
lines changed

2 files changed

+15
-0
lines changed

lmms_eval/api/task.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -938,6 +938,7 @@ def _download_from_youtube(path):
938938
force_download = dataset_kwargs.get("force_download", False)
939939
force_unzip = dataset_kwargs.get("force_unzip", False)
940940
revision = dataset_kwargs.get("revision", "main")
941+
create_link = dataset_kwargs.get("create_link", False)
941942
cache_path = snapshot_download(repo_id=self.DATASET_PATH, revision=revision, repo_type="dataset", force_download=force_download, etag_timeout=60)
942943
zip_files = glob(os.path.join(cache_path, "**/*.zip"), recursive=True)
943944
tar_files = glob(os.path.join(cache_path, "**/*.tar*"), recursive=True)
@@ -1001,6 +1002,16 @@ def concat_tar_parts(tar_parts, output_tar):
10011002
if not os.path.exists(os.path.join(cache_dir, os.path.basename(base_name))):
10021003
untar_video_data(output_tar)
10031004

1005+
# Link cache_path to cache_dir if needed.
1006+
if create_link:
1007+
if not os.path.exists(cache_dir) or os.path.islink(cache_dir):
1008+
if os.path.islink(cache_dir):
1009+
os.remove(cache_dir)
1010+
eval_logger.info(f"Removed existing symbolic link: {cache_dir}")
1011+
# Create a new symbolic link
1012+
os.symlink(cache_path, cache_dir)
1013+
eval_logger.info(f"Symbolic link created successfully: {cache_path} -> {cache_dir}")
1014+
10041015
accelerator.wait_for_everyone()
10051016
dataset_kwargs.pop("cache_dir")
10061017
dataset_kwargs.pop("video")
@@ -1019,6 +1030,9 @@ def concat_tar_parts(tar_parts, output_tar):
10191030
if "local_files_only" in dataset_kwargs:
10201031
dataset_kwargs.pop("local_files_only")
10211032

1033+
if "create_link" in dataset_kwargs:
1034+
dataset_kwargs.pop("create_link")
1035+
10221036
self.dataset = datasets.load_dataset(
10231037
path=self.DATASET_PATH,
10241038
name=self.DATASET_NAME,

lmms_eval/tasks/mvbench/_default_template_yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ dataset_kwargs:
44
cache_dir: mvbench_video
55
video: True
66
revision: video
7+
create_link: True
78
generation_kwargs:
89
max_new_tokens: 16
910
temperature: 0

0 commit comments

Comments
 (0)