diff --git a/lmms_eval/models/vllm.py b/lmms_eval/models/vllm.py
index 3b125718f..b361bdcbf 100644
--- a/lmms_eval/models/vllm.py
+++ b/lmms_eval/models/vllm.py
@@ -54,6 +54,13 @@ def __init__(
         self.max_frame_num = max_frame_num
         self.threads = threads
 
+        init_params = ["model_version", "tensor_parallel_size", "gpu_memory_utilization", "batch_size", "timeout", "max_images", "max_videos", "max_audios", "max_frame_num", "threads", "trust_remote_code"]
+
+        # filter out the parameters already defined in __init__ to pass options to VLLM
+        # this enables support for all VLLM Engine args:
+        # https://github.com/vllm-project/vllm/blob/3147586ebdb36ceae653e9dceec8cf9922fe2c28/vllm/engine/arg_utils.py#L93
+        filtered_kwargs = {k: v for k, v in kwargs.items() if k not in init_params}
+
         accelerator = Accelerator()
         self.client = LLM(
             model=self.model_version,
@@ -61,6 +68,7 @@ def __init__(
             gpu_memory_utilization=gpu_memory_utilization,
             limit_mm_per_prompt={"image": max_images, "video": max_videos, "audio": max_audios},
             trust_remote_code=trust_remote_code,
+            **filtered_kwargs,
         )
         if accelerator.num_processes > 1:
             assert accelerator.distributed_type in [DistributedType.FSDP, DistributedType.MULTI_GPU, DistributedType.DEEPSPEED], "Unsupported distributed type provided. Only DDP and FSDP are supported."