Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/livexiv_tqa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
group: livexiv_tqa
task:
- livexiv_tqa_v1
- livexiv_tqa_v2
- livexiv_tqa_v3
- livexiv_tqa_v4
- livexiv_tqa_v5
- livexiv_tqa_v6

26 changes: 26 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/livexiv_tqa_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
dataset_path: LiveXiv/LiveXiv
dataset_kwargs:
token: True
test_split: test
dataset_name: TQA-2024-09-21
output_type: generate_until
doc_to_visual: !function utils.livexiv_doc_to_visual
doc_to_text: !function utils.livexiv_doc_to_text
doc_to_target: "answer"
generation_kwargs:
until:
- "ASSISTANT:"
image_aspect_ratio: original
process_results: !function utils.livexiv_process_result
process_results_use_image: true
metric_list:
- metric: livexiv_tqa
aggregation: !function utils.livexiv_aggregation_result
higher_is_better: true
metadata:
- version: 0.0

lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_tqa_v1"
dataset_name: "TQA-2024-09-21"
include: livexiv_tqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_tqa_v2"
dataset_name: "TQA-2024-10-26"
include: livexiv_tqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_tqa_v3"
dataset_name: "v3-TQA"
include: livexiv_tqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_tqa_v4"
dataset_name: "v4-TQA"
include: livexiv_tqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_tqa_v5"
dataset_name: "v5-TQA"
include: livexiv_tqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/livexiv_tqa_v6.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_tqa_v6"
dataset_name: "v6-TQA"
include: livexiv_tqa_template_yaml
74 changes: 74 additions & 0 deletions lmms_eval/tasks/livexiv_tqa/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import json
import re


def extract_answer(text):
match = re.findall(r"(?<!^)[A-Z]", text)
if match:
return match[0]
return None


def livexiv_doc_to_visual(doc):
return [doc["image"].convert("RGB")]


def livexiv_doc_to_text(doc, model_specific_kwargs=None):
question = doc["question"]
question += "\n" + f"A. {doc['option_a']}\n"
question += f"B. {doc['option_b']}\n"
question += f"C. {doc['option_c']}\n"
question += f"D. {doc['option_d']}"
return f"{question}\nAnswer with the option's letter from the given choices directly."


def livexiv_process_result(doc, result):
pred = result[0].strip()
if len(pred) > 1:
if "answer" in pred.lower():
pred = extract_answer(pred)
else:
pred = pred[0]
answer = doc["gt"]

return {f"livexiv_tqa": {"pred": pred, "answer": answer}}


def livexiv_aggregation_result(results):
total_count = 0
total_correct = 0
for result in results:
try:
if result["pred"].lower().strip() == result["answer"].lower().strip():
total_correct += 1
except Exception as e:
print(e)

total_count += 1
return total_correct / total_count


def livexiv_aggregation_result_all(results):
score = livexiv_aggregation_result(results)
stored_results = []
for result in results:
stored_results.append({"question_id": result["question_id"], "prediction": result["pred"]})
with open("./livexiv_tqa_submission.json", "w") as f:
json.dump(stored_results, f, indent=4)
print("Storing files for LiveXiv-TQA submission ...")

return score


def livexiv_doc_to_text_mc(doc):
question = doc["question"]
return f"{question} Answer :"


def livexiv_doc_to_choice(doc):
return [doc["option_a"], doc["option_b"], doc["option_c"], doc["option_d"]]


def livexiv_doc_to_mc_target(doc):
answer2choice = {"A": "option_a", "B": "option_b", "C": "option_c", "D": "option_d"}
return doc[answer2choice[doc["answer"]]]
9 changes: 9 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/livexiv_vqa.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
group: livexiv_vqa
task:
- livexiv_vqa_v1
- livexiv_vqa_v2
- livexiv_vqa_v3
- livexiv_vqa_v4
- livexiv_vqa_v5
- livexiv_vqa_v6

26 changes: 26 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/livexiv_vqa_template_yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
dataset_path: LiveXiv/LiveXiv
dataset_kwargs:
token: True
test_split: test
dataset_name: VQA-2024-09-21
output_type: generate_until
doc_to_visual: !function utils.livexiv_doc_to_visual
doc_to_text: !function utils.livexiv_doc_to_text
doc_to_target: "answer"
generation_kwargs:
until:
- "ASSISTANT:"
image_aspect_ratio: original
process_results: !function utils.livexiv_process_result
process_results_use_image: true
metric_list:
- metric: livexiv_vqa
aggregation: !function utils.livexiv_aggregation_result
higher_is_better: true
metadata:
- version: 0.0

lmms_eval_specific_kwargs:
default:
pre_prompt: ""
post_prompt: ""
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_vqa_v1"
dataset_name: "VQA-2024-09-21"
include: livexiv_vqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_vqa_v2"
dataset_name: "VQA-2024-10-26"
include: livexiv_vqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v3.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_vqa_v3"
dataset_name: "v3-VQA"
include: livexiv_vqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v4.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_vqa_v4"
dataset_name: "v4-VQA"
include: livexiv_vqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v5.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_vqa_v5"
dataset_name: "v5-VQA"
include: livexiv_vqa_template_yaml
3 changes: 3 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/livexiv_vqa_v6.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
task: "livexiv_vqa_v6"
dataset_name: "v6-VQA"
include: livexiv_vqa_template_yaml
74 changes: 74 additions & 0 deletions lmms_eval/tasks/livexiv_vqa/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import json
import re


def extract_answer(text):
match = re.findall(r"(?<!^)[A-Z]", text)
if match:
return match[0]
return None


def livexiv_doc_to_visual(doc):
return [doc["image"].convert("RGB")]


def livexiv_doc_to_text(doc, model_specific_kwargs=None):
question = doc["question"]
question += "\n" + f"A. {doc['option_a']}\n"
question += f"B. {doc['option_b']}\n"
question += f"C. {doc['option_c']}\n"
question += f"D. {doc['option_d']}"
return f"{question}\nAnswer with the option's letter from the given choices directly."


def livexiv_process_result(doc, result):
pred = result[0].strip()
if len(pred) > 1:
if "answer" in pred.lower():
pred = extract_answer(pred)
else:
pred = pred[0]
answer = doc["gt"]

return {f"livexiv_vqa": {"pred": pred, "answer": answer}}


def livexiv_aggregation_result(results):
total_count = 0
total_correct = 0
for result in results:
try:
if result["pred"].lower().strip() == result["answer"].lower().strip():
total_correct += 1
except Exception as e:
print(e)

total_count += 1
return total_correct / total_count


def livexiv_aggregation_result_all(results):
score = livexiv_aggregation_result(results)
stored_results = []
for result in results:
stored_results.append({"question_id": result["question_id"], "prediction": result["pred"]})
with open("./livexiv_vqa_submission.json", "w") as f:
json.dump(stored_results, f, indent=4)
print("Storing files for LiveXiv-VQA submission ...")

return score


def livexiv_doc_to_text_mc(doc):
question = doc["question"]
return f"{question} Answer :"


def livexiv_doc_to_choice(doc):
return [doc["option_a"], doc["option_b"], doc["option_c"], doc["option_d"]]


def livexiv_doc_to_mc_target(doc):
answer2choice = {"A": "option_a", "B": "option_b", "C": "option_c", "D": "option_d"}
return doc[answer2choice[doc["answer"]]]