diff --git a/lmms_eval/models/vllm.py b/lmms_eval/models/vllm.py index 3b125718f..b361bdcbf 100644 --- a/lmms_eval/models/vllm.py +++ b/lmms_eval/models/vllm.py @@ -54,6 +54,13 @@ def __init__( self.max_frame_num = max_frame_num self.threads = threads + init_params = ["model_version", "tensor_parallel_size", "gpu_memory_utilization", "batch_size", "timeout", "max_images", "max_videos", "max_audios", "max_frame_num", "threads", "trust_remote_code"] + + # filter out the parameters already defined in __init__ to pass options to VLLM + # this enables support for all VLLM Engine args: + # https://github.com/vllm-project/vllm/blob/3147586ebdb36ceae653e9dceec8cf9922fe2c28/vllm/engine/arg_utils.py#L93 + filtered_kwargs = {k: v for k, v in kwargs.items() if k not in init_params} + accelerator = Accelerator() self.client = LLM( model=self.model_version, @@ -61,6 +68,7 @@ def __init__( gpu_memory_utilization=gpu_memory_utilization, limit_mm_per_prompt={"image": max_images, "video": max_videos, "audio": max_audios}, trust_remote_code=trust_remote_code, + **filtered_kwargs, ) if accelerator.num_processes > 1: assert accelerator.distributed_type in [DistributedType.FSDP, DistributedType.MULTI_GPU, DistributedType.DEEPSPEED], "Unsupported distributed type provided. Only DDP and FSDP are supported."