Skip to content

Commit 028631e

Browse files
finish benchmark
1 parent a5beb32 commit 028631e

File tree

4 files changed

+68
-24
lines changed

4 files changed

+68
-24
lines changed

optimum_benchmark/hub_utils.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,9 +110,7 @@ def push_to_hub(
110110
)
111111

112112
@classmethod
113-
def from_pretrained(
114-
cls, repo_id: str, filename: Optional[str] = None, subfolder: Optional[str] = None, **kwargs
115-
) -> Self:
113+
def from_hub(cls, repo_id: str, filename: Optional[str] = None, subfolder: Optional[str] = None, **kwargs) -> Self:
116114
filename = str(filename or cls.default_filename)
117115
subfolder = str(subfolder or cls.default_subfolder)
118116

optimum_benchmark/scenarios/inference/scenario.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,7 @@ def run_text_generation_memory_tracking(self):
241241
with self.memory_tracker.track():
242242
self.backend.generate(self.inputs, self.config.generate_kwargs)
243243

244+
self.report.generate.memory = self.memory_tracker.get_max_memory()
244245
self.report.decode.memory = self.memory_tracker.get_max_memory()
245246

246247
def run_image_diffusion_memory_tracking(self):

tests/test_api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ def test_api_push_to_hub_mixin():
163163

164164
# Hugging Face Hub API
165165
artifact.push_to_hub(repo_id=PUSH_REPO_ID, subfolder=benchmark_name)
166-
from_hub_artifact = artifact.__class__.from_pretrained(repo_id=PUSH_REPO_ID, subfolder=benchmark_name)
166+
from_hub_artifact = artifact.__class__.from_hub(repo_id=PUSH_REPO_ID, subfolder=benchmark_name)
167167
assert from_hub_artifact.to_dict() == artifact.to_dict()
168168

169169

uv_scripts/cpu_openvino_vlm.py

Lines changed: 65 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,35 @@
11
# /// script
22
# dependencies = [
3-
# "optimum-benchmark[openvino]==0.7",
4-
# "transformers==4.53",
3+
# "optimum-benchmark[openvino]@git+https://github.com/huggingface/optimum-benchmark.git@main",
4+
# "optimum-intel@git+https://github.com/huggingface/optimum-intel.git@main",
5+
# "transformers==4.55",
56
# "torchvision",
67
# "num2words",
78
# ]
89
# ///
910

10-
from optimum_benchmark import Benchmark, BenchmarkConfig, InferenceConfig, OpenVINOConfig, ProcessConfig, PyTorchConfig
11-
from optimum_benchmark.logging_utils import setup_logging
12-
13-
setup_logging(level="INFO", to_file=True, prefix="OPTIMUM-BENCHMARK")
11+
import matplotlib.pyplot as plt
1412

13+
from optimum_benchmark import (
14+
Benchmark,
15+
BenchmarkConfig,
16+
BenchmarkReport,
17+
InferenceConfig,
18+
OpenVINOConfig,
19+
ProcessConfig,
20+
PyTorchConfig,
21+
)
22+
from optimum_benchmark.logging_utils import setup_logging
1523

1624
if __name__ == "__main__":
25+
setup_logging(level="INFO", to_file=True, prefix="OPTIMUM-BENCHMARK")
26+
1727
launcher_config = ProcessConfig()
1828
scenario_config = InferenceConfig(
29+
memory=True,
1930
latency=True,
20-
input_shapes={"batch_size": 1, "sequence_length": 16, "num_images": 1},
2131
generate_kwargs={"max_new_tokens": 16, "min_new_tokens": 16},
32+
input_shapes={"batch_size": 1, "sequence_length": 16, "num_images": 1},
2233
)
2334

2435
model = "HuggingFaceTB/SmolVLM2-500M-Video-Instruct"
@@ -29,17 +40,16 @@
2940
device="cpu",
3041
model=model,
3142
no_weights=True,
32-
quantization_config={"bits": 8, "weight_only": True, "num_samples": 1},
43+
quantization_config={"bits": 8, "num_samples": 1, "weight_only": True},
3344
),
3445
"openvino-8bit-static": OpenVINOConfig(
3546
device="cpu",
3647
model=model,
3748
no_weights=True,
38-
quantization_config={"n_bits": 8, "weight_only": False, "num_samples": 1},
49+
quantization_config={"bits": 8, "num_samples": 1, "dataset": "contextual"},
3950
),
4051
}
4152

42-
results = {}
4353
for config_name, backend_config in backend_configs.items():
4454
benchmark_config = BenchmarkConfig(
4555
name=f"{config_name}",
@@ -48,13 +58,48 @@
4858
backend=backend_config,
4959
)
5060
benchmark_report = Benchmark.launch(benchmark_config)
51-
benchmark_report.save_json(f"{config_name}_vlm_benchmark_report.json")
52-
results[config_name] = benchmark_report
53-
54-
for config_name, benchmark_report in results.items():
55-
print("-" * 80)
56-
print(f"Results for {config_name}:")
57-
print("- Prefill Metrics:") # prefill = the processing of the input (text + image) to produce the first token
58-
benchmark_report.prefill.log()
59-
print("- Decode Metrics:") # decode = the processing of subsequent tokens
60-
benchmark_report.decode.log()
61+
# benchmark_report.to_json(f"{config_name}_report.json")
62+
benchmark_report.push_to_hub(repo_id="IlyasMoutawwakil/vlm_benchmark", filename=f"{config_name}_report")
63+
64+
backend_reports = {}
65+
for config_name in backend_configs.keys():
66+
# backend_reports[config_name] = BenchmarkReport.from_json(f"{config_name}_report.json")
67+
backend_reports[config_name] = BenchmarkReport.from_hub(
68+
repo_id="IlyasMoutawwakil/vlm_benchmark", filename=f"{config_name}_report"
69+
)
70+
71+
_, ax = plt.subplots()
72+
ax.boxplot(
73+
[backend_reports[config_name].prefill.latency.values for config_name in backend_reports.keys()],
74+
tick_labels=backend_reports.keys(),
75+
showfliers=False,
76+
)
77+
plt.xticks(rotation=10)
78+
ax.set_ylabel("Latency (s)")
79+
ax.set_xlabel("Configurations")
80+
ax.set_title("Prefill Latencies")
81+
plt.savefig("prefill_latencies_boxplot.png")
82+
83+
_, ax = plt.subplots()
84+
ax.boxplot(
85+
[backend_reports[config_name].per_token.latency.values for config_name in backend_reports.keys()],
86+
tick_labels=backend_reports.keys(),
87+
showfliers=False,
88+
)
89+
plt.xticks(rotation=10)
90+
ax.set_ylabel("Latency (s)")
91+
ax.set_xlabel("Configurations")
92+
ax.set_title("Per-token Latencies")
93+
plt.savefig("per_token_latencies_boxplot.png")
94+
95+
_, ax = plt.subplots()
96+
ax.bar(
97+
list(backend_reports.keys()),
98+
[backend_reports[config_name].generate.memory.max_ram for config_name in backend_reports.keys()],
99+
color=["C0", "C1", "C2", "C3", "C4", "C5"],
100+
)
101+
plt.xticks(rotation=10)
102+
ax.set_title("Max RAM")
103+
ax.set_ylabel("RAM (MB)")
104+
ax.set_xlabel("Configurations")
105+
plt.savefig("max_ram_barplot.png")

0 commit comments

Comments
 (0)