File tree Expand file tree Collapse file tree 1 file changed +2
-3
lines changed
lmms_eval/tasks/vl_rewardbench Expand file tree Collapse file tree 1 file changed +2
-3
lines changed Original file line number Diff line number Diff line change 77import requests
88from loguru import logger as eval_logger
99
10-
1110LLM_PARSE_ANSWER_PROMPT = """
1211You are given a pairwise judgement for two responses. Please return the better response according to the judgement.
1312Return the Answer X ONLY. e.g., Answer 1 or Answer 2.
@@ -102,9 +101,9 @@ def vlrewardbench_process_results(doc, results):
102101 a dictionary with key: metric name (in this case mme score), value: metric value
103102 """
104103 pred = results [0 ]
105- pred_ans = parse_pred_ans (pred ) # 1 or 2 indicte which one is better
104+ pred_ans = parse_pred_ans (pred ) # 1 or 2 indicte which one is better
106105 random_number = sum (len (res ) for res in doc ["response" ]) % 2 # we use the length sum % 2 as a random number generator to decide the order of the answers
107- # Note: human_ranking [0, 1] -> answer 1 is better, [1, 0] -> answer 2 is better
106+ # Note: human_ranking [0, 1] -> answer 1 is better, [1, 0] -> answer 2 is better
108107 gt_ans = doc ["human_ranking" ].index (0 if random_number == 0 else 1 ) + 1
109108
110109 if pred_ans == gt_ans :
You can’t perform that action at this time.
0 commit comments