Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions lmms_eval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ def cli_evaluate_single(args: Union[argparse.Namespace, None] = None) -> None:

if args.include_path is not None:
eval_logger.info(f"Including path: {args.include_path}")
task_manager = TaskManager(args.verbosity, include_path=args.include_path)
task_manager = TaskManager(args.verbosity, include_path=args.include_path, model_name=args.model)

# update the evaluation tracker args with the output path and the HF token
if args.output_path:
Expand All @@ -392,8 +392,6 @@ def cli_evaluate_single(args: Union[argparse.Namespace, None] = None) -> None:
if args.include_path is not None:
eval_logger.info(f"Including path: {args.include_path}")

task_manager = TaskManager(args.verbosity, include_path=args.include_path)

if "push_samples_to_hub" in evaluation_tracker_args and not args.log_samples:
eval_logger.warning("Pushing samples to the Hub requires --log_samples to be set. Samples will not be pushed to the Hub.")

Expand Down
7 changes: 6 additions & 1 deletion lmms_eval/models/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,10 @@ def generate_until(self, requests) -> List[str]:
gen_kwargs["num_beams"] = 1

for attempt in range(5):
retry_flag = True
try:
message = client.messages.create(model=self.model_version, max_tokens=gen_kwargs["max_new_tokens"], system=self.system_prompt, temperature=gen_kwargs["temperature"], top_p=gen_kwargs["top_p"], messages=messages)
retry_flag = False
except Exception as e:
eval_logger.info(f"Attempt {attempt + 1} failed with error: {str(e)}")
if attempt < 5 - 1: # If we have retries left, sleep and then continue to next attempt
Expand All @@ -243,6 +245,9 @@ def generate_until(self, requests) -> List[str]:
res.append("")
pbar.update(1)
continue
if not retry_flag:
break
eval_logger.info("Retrying...")

response_text = message.content[0].text
res.append(message.content[0].text)
Expand All @@ -254,7 +259,7 @@ def generate_until(self, requests) -> List[str]:
doc_uuid = f"{task}___{split}___{doc_id}"
self.response_cache[doc_uuid] = response_text
with open(self.response_persistent_file, "w") as f:
json.dump(self.response_cache, f)
json.dump(self.response_cache, f, indent=4)

pbar.close()

Expand Down
42 changes: 21 additions & 21 deletions lmms_eval/tasks/mix_evals/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,15 +130,15 @@ def mix_evals_video2text_doc_to_visual(doc):


# This is the place where you format your question
def mix_evals_video2text_doc_to_text(doc, model_specific_prompt_kwargs=None):
if model_specific_prompt_kwargs is None:
model_specific_prompt_kwargs = {}
def mix_evals_video2text_doc_to_text(doc, lmms_eval_specific_kwargs=None):
if lmms_eval_specific_kwargs is None:
lmms_eval_specific_kwargs = {}
pre_prompt = ""
post_prompt = ""
if "pre_prompt" in model_specific_prompt_kwargs:
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
if "post_prompt" in model_specific_prompt_kwargs:
post_prompt = model_specific_prompt_kwargs["post_prompt"]
if "pre_prompt" in lmms_eval_specific_kwargs:
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
if "post_prompt" in lmms_eval_specific_kwargs:
post_prompt = lmms_eval_specific_kwargs["post_prompt"]

user_prompt = doc["prompt"]

Expand Down Expand Up @@ -166,15 +166,15 @@ def mix_evals_video2text_doc_to_text(doc, model_specific_prompt_kwargs=None):
"""


def mix_evals_video2text_doc_to_text_open_convs(doc, model_specific_prompt_kwargs=None):
if model_specific_prompt_kwargs is None:
model_specific_prompt_kwargs = {}
def mix_evals_video2text_doc_to_text_open_convs(doc, lmms_eval_specific_kwargs=None):
if lmms_eval_specific_kwargs is None:
lmms_eval_specific_kwargs = {}
pre_prompt = ""
post_prompt = ""
if "pre_prompt" in model_specific_prompt_kwargs:
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
if "post_prompt" in model_specific_prompt_kwargs:
post_prompt = model_specific_prompt_kwargs["post_prompt"]
if "pre_prompt" in lmms_eval_specific_kwargs:
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
if "post_prompt" in lmms_eval_specific_kwargs:
post_prompt = lmms_eval_specific_kwargs["post_prompt"]

filtered_first_turn = re.sub(r"<video_[0-9]+>", "", doc["first_turn_user_prompt"])
return OPEN_CONVS_PROMPT.format(
Expand All @@ -192,15 +192,15 @@ def mix_evals_video2text_doc_to_text_open_convs(doc, model_specific_prompt_kwarg
"""


def mix_evals_video2text_doc_to_text_open_2nd_convs(doc, model_specific_prompt_kwargs=None):
if model_specific_prompt_kwargs is None:
model_specific_prompt_kwargs = {}
def mix_evals_video2text_doc_to_text_open_2nd_convs(doc, lmms_eval_specific_kwargs=None):
if lmms_eval_specific_kwargs is None:
lmms_eval_specific_kwargs = {}
pre_prompt = ""
post_prompt = ""
if "pre_prompt" in model_specific_prompt_kwargs:
pre_prompt = model_specific_prompt_kwargs["pre_prompt"]
if "post_prompt" in model_specific_prompt_kwargs:
post_prompt = model_specific_prompt_kwargs["post_prompt"]
if "pre_prompt" in lmms_eval_specific_kwargs:
pre_prompt = lmms_eval_specific_kwargs["pre_prompt"]
if "post_prompt" in lmms_eval_specific_kwargs:
post_prompt = lmms_eval_specific_kwargs["post_prompt"]

return MODEL_CONVS_PROMPT.format(
PRE=pre_prompt,
Expand Down