finish benchmark

IlyasMoutawwakil · IlyasMoutawwakil · commit 028631e2b5e3 · 2025-09-22T08:03:04.000+02:00
diff --git a/optimum_benchmark/hub_utils.py b/optimum_benchmark/hub_utils.py
@@ -110,9 +110,7 @@ def push_to_hub(
             )
 
     @classmethod
-    def from_pretrained(
-        cls, repo_id: str, filename: Optional[str] = None, subfolder: Optional[str] = None, **kwargs
-    ) -> Self:
+    def from_hub(cls, repo_id: str, filename: Optional[str] = None, subfolder: Optional[str] = None, **kwargs) -> Self:
         filename = str(filename or cls.default_filename)
         subfolder = str(subfolder or cls.default_subfolder)
 
diff --git a/optimum_benchmark/scenarios/inference/scenario.py b/optimum_benchmark/scenarios/inference/scenario.py
@@ -241,6 +241,7 @@ def run_text_generation_memory_tracking(self):
         with self.memory_tracker.track():
             self.backend.generate(self.inputs, self.config.generate_kwargs)
 
+        self.report.generate.memory = self.memory_tracker.get_max_memory()
         self.report.decode.memory = self.memory_tracker.get_max_memory()
 
     def run_image_diffusion_memory_tracking(self):
diff --git a/tests/test_api.py b/tests/test_api.py
@@ -163,7 +163,7 @@ def test_api_push_to_hub_mixin():
 
         # Hugging Face Hub API
         artifact.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=benchmark_name)
-        from_hub_artifact = artifact.__class__.from_pretrained(repo_id=PUSH_REPO_ID, subfolder=benchmark_name)
+        from_hub_artifact = artifact.__class__.from_hub(repo_id=PUSH_REPO_ID, subfolder=benchmark_name)
         assert from_hub_artifact.to_dict() == artifact.to_dict()
 
 
diff --git a/uv_scripts/cpu_openvino_vlm.py b/uv_scripts/cpu_openvino_vlm.py
@@ -1,24 +1,35 @@
 # /// script
 # dependencies = [
-#   "optimum-benchmark[openvino]==0.7",
-#   "transformers==4.53",
+#   "optimum-benchmark[openvino]@git+https://github.com/huggingface/optimum-benchmark.git@main",
+#   "optimum-intel@git+https://github.com/huggingface/optimum-intel.git@main",
+#   "transformers==4.55",
 #   "torchvision",
 #   "num2words",
 # ]
 # ///
 
-from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, OpenVINOConfig, ProcessConfig, PyTorchConfig
-from optimum_benchmark.logging_utils import setup_logging
-
-setup_logging(level="INFO", to_file=True, prefix="OPTIMUM-BENCHMARK")
+import matplotlib.pyplot as plt
 
+from optimum_benchmark import (
+    Benchmark,
+    BenchmarkConfig,
+    BenchmarkReport,
+    InferenceConfig,
+    OpenVINOConfig,
+    ProcessConfig,
+    PyTorchConfig,
+)
+from optimum_benchmark.logging_utils import setup_logging
 
 if __name__ == "__main__":
+    setup_logging(level="INFO", to_file=True, prefix="OPTIMUM-BENCHMARK")
+
     launcher_config = ProcessConfig()
     scenario_config = InferenceConfig(
+        memory=True,
         latency=True,
-        input_shapes={"batch_size": 1, "sequence_length": 16, "num_images": 1},
         generate_kwargs={"max_new_tokens": 16, "min_new_tokens": 16},
+        input_shapes={"batch_size": 1, "sequence_length": 16, "num_images": 1},
     )
 
     model = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
@@ -29,17 +40,16 @@
             device="cpu",
             model=model,
             no_weights=True,
-            quantization_config={"bits": 8, "weight_only": True, "num_samples": 1},
+            quantization_config={"bits": 8, "num_samples": 1, "weight_only": True},
         ),
         "openvino-8bit-static": OpenVINOConfig(
             device="cpu",
             model=model,
             no_weights=True,
-            quantization_config={"n_bits": 8, "weight_only": False, "num_samples": 1},
+            quantization_config={"bits": 8, "num_samples": 1, "dataset": "contextual"},
         ),
     }
 
-    results = {}
     for config_name, backend_config in backend_configs.items():
         benchmark_config = BenchmarkConfig(
             name=f"{config_name}",
@@ -48,13 +58,48 @@
             backend=backend_config,
         )
         benchmark_report = Benchmark.launch(benchmark_config)
-        benchmark_report.save_json(f"{config_name}_vlm_benchmark_report.json")
-        results[config_name] = benchmark_report
-
-    for config_name, benchmark_report in results.items():
-        print("-" * 80)
-        print(f"Results for {config_name}:")
-        print("- Prefill Metrics:")  # prefill = the processing of the input (text + image) to produce the first token
-        benchmark_report.prefill.log()
-        print("- Decode Metrics:")  # decode = the processing of subsequent tokens
-        benchmark_report.decode.log()
+        # benchmark_report.to_json(f"{config_name}_report.json")
+        benchmark_report.push_to_hub(repo_id="IlyasMoutawwakil/vlm_benchmark", filename=f"{config_name}_report")
+
+    backend_reports = {}
+    for config_name in backend_configs.keys():
+        # backend_reports[config_name] = BenchmarkReport.from_json(f"{config_name}_report.json")
+        backend_reports[config_name] = BenchmarkReport.from_hub(
+            repo_id="IlyasMoutawwakil/vlm_benchmark", filename=f"{config_name}_report"
+        )
+
+    _, ax = plt.subplots()
+    ax.boxplot(
+        [backend_reports[config_name].prefill.latency.values for config_name in backend_reports.keys()],
+        tick_labels=backend_reports.keys(),
+        showfliers=False,
+    )
+    plt.xticks(rotation=10)
+    ax.set_ylabel("Latency (s)")
+    ax.set_xlabel("Configurations")
+    ax.set_title("Prefill Latencies")
+    plt.savefig("prefill_latencies_boxplot.png")
+
+    _, ax = plt.subplots()
+    ax.boxplot(
+        [backend_reports[config_name].per_token.latency.values for config_name in backend_reports.keys()],
+        tick_labels=backend_reports.keys(),
+        showfliers=False,
+    )
+    plt.xticks(rotation=10)
+    ax.set_ylabel("Latency (s)")
+    ax.set_xlabel("Configurations")
+    ax.set_title("Per-token Latencies")
+    plt.savefig("per_token_latencies_boxplot.png")
+
+    _, ax = plt.subplots()
+    ax.bar(
+        list(backend_reports.keys()),
+        [backend_reports[config_name].generate.memory.max_ram for config_name in backend_reports.keys()],
+        color=["C0", "C1", "C2", "C3", "C4", "C5"],
+    )
+    plt.xticks(rotation=10)
+    ax.set_title("Max RAM")
+    ax.set_ylabel("RAM (MB)")
+    ax.set_xlabel("Configurations")
+    plt.savefig("max_ram_barplot.png")

Original file line number	Diff line number	Diff line change
`@@ -110,9 +110,7 @@ def push_to_hub(`
`110`	`110`	`)`
`111`	`111`
`112`	`112`	`@classmethod`
`113`		`- def from_pretrained(`
`114`		`- cls, repo_id: str, filename: Optional[str] = None, subfolder: Optional[str] = None, **kwargs`
`115`		`- ) -> Self:`
	`113`	`+ def from_hub(cls, repo_id: str, filename: Optional[str] = None, subfolder: Optional[str] = None, **kwargs) -> Self:`
`116`	`114`	`filename = str(filename or cls.default_filename)`
`117`	`115`	`subfolder = str(subfolder or cls.default_subfolder)`
`118`	`116`