Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Markdown Report #280

Merged
merged 19 commits into from
Oct 8, 2024
8 changes: 3 additions & 5 deletions optimum_benchmark/backends/onnxruntime/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,11 +297,9 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
inputs = process_inputs

if self.config.library == "transformers":
for key, value in list(inputs.items()):
if key in ["position_ids", "token_type_ids"]:
if key not in self.pretrained_model.input_names:
inputs.pop(key)
for key in list(inputs.keys()):
if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names:
inputs.pop(key)

for key, value in inputs.items():
if isinstance(value, torch.Tensor):
Expand Down
4 changes: 4 additions & 0 deletions optimum_benchmark/backends/openvino/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ def prepare_inputs(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
with Accelerator().split_between_processes(inputs=inputs, apply_padding=False) as process_inputs:
inputs = process_inputs

for key in list(inputs.keys()):
if hasattr(self.pretrained_model, "input_names") and key not in self.pretrained_model.input_names:
inputs.pop(key)

return inputs

def forward(self, inputs: Dict[str, Any], kwargs: Dict[str, Any]) -> OrderedDict:
Expand Down
20 changes: 15 additions & 5 deletions optimum_benchmark/benchmark/base.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import dataclass
from logging import getLogger
from typing import TYPE_CHECKING, Type

from hydra.utils import get_class
Expand All @@ -16,6 +17,9 @@
from ..scenarios.base import Scenario


LOGGER = getLogger("benchmark")


@dataclass
class Benchmark(PushToHubMixin):
config: BenchmarkConfig
Expand All @@ -32,8 +36,8 @@ def __post_init__(self):
elif not isinstance(self.report, BenchmarkReport):
raise ValueError("report must be either a dict or a BenchmarkReport instance")

@classmethod
def launch(cls, config: BenchmarkConfig):
@staticmethod
def launch(config: BenchmarkConfig):
"""
Runs an benchmark using specified launcher configuration/logic
"""
Expand All @@ -44,12 +48,18 @@ def launch(cls, config: BenchmarkConfig):
launcher: Launcher = launcher_factory(launcher_config)

# Launch the benchmark using the launcher
report = launcher.launch(worker=cls.run, worker_args=[config])
report = launcher.launch(worker=Benchmark.run, worker_args=[config])

if config.log_report:
report.log()

if config.print_report:
report.print()

return report

@classmethod
def run(cls, config: BenchmarkConfig):
@staticmethod
def run(config: BenchmarkConfig):
"""
Runs a scenario using specified backend configuration/logic
"""
Expand Down
3 changes: 3 additions & 0 deletions optimum_benchmark/benchmark/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ class BenchmarkConfig(PushToHubMixin):
# ENVIRONMENT CONFIGURATION
environment: Dict[str, Any] = field(default_factory=lambda: {**get_system_info(), **get_hf_libs_info()})

print_report: bool = False
log_report: bool = True

@classproperty
def default_filename(cls) -> str:
return "benchmark_config.json"
119 changes: 69 additions & 50 deletions optimum_benchmark/benchmark/report.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
from dataclasses import dataclass, make_dataclass
from logging import getLogger
from typing import Any, Dict, List, Optional

from rich.console import Console
from rich.markdown import Markdown

from ..hub_utils import PushToHubMixin, classproperty
from ..trackers.energy import Efficiency, Energy
from ..trackers.latency import Latency, Throughput
from ..trackers.memory import Memory

CONSOLE = Console()
LOGGER = getLogger("benchmark")


@dataclass
class BenchmarkMeasurements:
class Measurements:
memory: Optional[Memory] = None
latency: Optional[Latency] = None
throughput: Optional[Throughput] = None
Expand All @@ -28,7 +35,7 @@ def __post_init__(self):
self.efficiency = Efficiency(**self.efficiency)

@staticmethod
def aggregate(measurements: List["BenchmarkMeasurements"]) -> "BenchmarkMeasurements":
def aggregate(measurements: List["Measurements"]) -> "Measurements":
assert len(measurements) > 0, "No measurements to aggregate"

m0 = measurements[0]
Expand All @@ -39,7 +46,37 @@ def aggregate(measurements: List["BenchmarkMeasurements"]) -> "BenchmarkMeasurem
energy = Energy.aggregate([m.energy for m in measurements]) if m0.energy is not None else None
efficiency = Efficiency.aggregate([m.efficiency for m in measurements]) if m0.efficiency is not None else None

return BenchmarkMeasurements(memory, latency, throughput, energy, efficiency)
return Measurements(memory=memory, latency=latency, throughput=throughput, energy=energy, efficiency=efficiency)

def to_plain_text(self) -> str:
plain_text = ""

for key in ["memory", "latency", "throughput", "energy", "efficiency"]:
measurement = getattr(self, key)
if measurement is not None:
plain_text += f"\t+ {key}:\n"
plain_text += measurement.to_plain_text()

return plain_text

def log(self):
for line in self.to_plain_text().split("\n"):
if line:
LOGGER.info(line)

def to_markdown_text(self) -> str:
markdown_text = ""

for key in ["memory", "latency", "throughput", "energy", "efficiency"]:
measurement = getattr(self, key)
if measurement is not None:
markdown_text += f"## {key}:\n\n"
markdown_text += measurement.to_markdown_text()

return markdown_text

def print(self):
CONSOLE.print(Markdown(self.to_markdown_text()))


@dataclass
Expand All @@ -55,63 +92,45 @@ def from_dict(cls, data: Dict[str, Any]) -> "BenchmarkReport":
def __post_init__(self):
for target in self.to_dict().keys():
if getattr(self, target) is None:
setattr(self, target, BenchmarkMeasurements())
setattr(self, target, Measurements())
elif isinstance(getattr(self, target), dict):
setattr(self, target, BenchmarkMeasurements(**getattr(self, target)))

def log_memory(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.memory is not None:
measurements.memory.log(prefix=target)

def log_latency(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.latency is not None:
measurements.latency.log(prefix=target)

def log_throughput(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.throughput is not None:
measurements.throughput.log(prefix=target)

def log_energy(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.energy is not None:
measurements.energy.log(prefix=target)

def log_efficiency(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.efficiency is not None:
measurements.efficiency.log(prefix=target)

def log(self):
for target in self.to_dict().keys():
measurements: BenchmarkMeasurements = getattr(self, target)
if measurements.memory is not None:
measurements.memory.log(prefix=target)
if measurements.latency is not None:
measurements.latency.log(prefix=target)
if measurements.throughput is not None:
measurements.throughput.log(prefix=target)
if measurements.energy is not None:
measurements.energy.log(prefix=target)
if measurements.efficiency is not None:
measurements.efficiency.log(prefix=target)
setattr(self, target, Measurements(**getattr(self, target)))

@classmethod
def aggregate(cls, reports: List["BenchmarkReport"]) -> "BenchmarkReport":
aggregated_measurements = {}
for target in reports[0].to_dict().keys():
measurements = [getattr(report, target) for report in reports]
aggregated_measurements[target] = BenchmarkMeasurements.aggregate(measurements)
aggregated_measurements[target] = Measurements.aggregate(measurements)

return cls.from_dict(aggregated_measurements)

@classproperty
def default_filename(self) -> str:
return "benchmark_report.json"

def to_plain_text(self) -> str:
plain_text = ""

for target in self.to_dict().keys():
plain_text += f"+ {target}:\n"
plain_text += getattr(self, target).to_plain_text()

return plain_text

def log(self):
for line in self.to_plain_text().split("\n"):
if line:
LOGGER.info(line)

def to_markdown_text(self) -> str:
markdown_text = ""

for target in self.to_dict().keys():
markdown_text += f"# {target}:\n\n"
markdown_text += getattr(self, target).to_markdown_text()

return markdown_text

def print(self):
CONSOLE.print(Markdown(self.to_markdown_text()))
2 changes: 2 additions & 0 deletions optimum_benchmark/launchers/inline/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,7 @@ def __init__(self, config: InlineConfig):

def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any]) -> BenchmarkReport:
self.logger.warning("The inline launcher is only recommended for debugging purposes and not for benchmarking")

report = worker(*worker_args)

return report
1 change: 0 additions & 1 deletion optimum_benchmark/launchers/process/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any])
elif "report" in response:
self.logger.info("\t+ Received report from isolated process")
report = BenchmarkReport.from_dict(response["report"])
report.log()
else:
raise RuntimeError(f"Received an unexpected response from isolated process: {response}")

Expand Down
2 changes: 0 additions & 2 deletions optimum_benchmark/launchers/torchrun/launcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,6 @@ def launch(self, worker: Callable[..., BenchmarkReport], worker_args: List[Any])

self.logger.info("\t+ Aggregating reports from all rank processes")
report = BenchmarkReport.aggregate(reports)
report.log()

return report


Expand Down
62 changes: 23 additions & 39 deletions optimum_benchmark/scenarios/inference/scenario.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from ..base import Scenario
from .config import InferenceConfig

PER_TOKEN_BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor"]
PER_TOKEN_BACKENDS = ["pytorch", "onnxruntime", "openvino", "neural-compressor", "ipex"]

TEXT_GENERATION_DEFAULT_KWARGS = {
"num_return_sequences": 1,
Expand Down Expand Up @@ -91,24 +91,15 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
self.logger.info("\t+ Preparing inputs for Inference")
self.inputs = backend.prepare_inputs(inputs=self.inputs)

if self.config.memory:
if backend.config.task in TEXT_GENERATION_TASKS:
self.run_text_generation_memory_tracking(backend)
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
self.run_image_diffusion_memory_tracking(backend)
else:
self.run_inference_memory_tracking(backend)

self.report.log_memory()

if self.config.latency or self.config.energy:
# latency and energy are metrics that require some warmup
if backend.config.task in TEXT_GENERATION_TASKS:
self.warmup_text_generation(backend)
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
self.warmup_image_diffusion(backend)
else:
self.warmup_inference(backend)
if self.config.warmup_runs > 0:
if backend.config.task in TEXT_GENERATION_TASKS:
self.warmup_text_generation(backend)
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
self.warmup_image_diffusion(backend)
else:
self.warmup_inference(backend)

if self.config.latency:
if backend.config.task in TEXT_GENERATION_TASKS:
Expand All @@ -121,8 +112,13 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
else:
self.run_latency_inference_tracking(backend)

self.report.log_latency()
self.report.log_throughput()
if self.config.memory:
if backend.config.task in TEXT_GENERATION_TASKS:
self.run_text_generation_memory_tracking(backend)
elif backend.config.task in IMAGE_DIFFUSION_TASKS:
self.run_image_diffusion_memory_tracking(backend)
else:
self.run_inference_memory_tracking(backend)

if self.config.energy:
if backend.config.task in TEXT_GENERATION_TASKS:
Expand All @@ -132,11 +128,9 @@ def run(self, backend: Backend[BackendConfigT]) -> BenchmarkReport:
else:
self.run_inference_energy_tracking(backend)

self.report.log_energy()
self.report.log_efficiency()

return self.report

# Warmup
def warmup_text_generation(self, backend: Backend[BackendConfigT]):
self.logger.info("\t+ Warming up backend for Text Generation")
_ = backend.generate(self.inputs, self.config.generate_kwargs)
Expand All @@ -158,35 +152,25 @@ def warmup_inference(self, backend: Backend[BackendConfigT]):
def run_model_loading_tracking(self, backend: Backend[BackendConfigT]):
self.logger.info("\t+ Running model loading tracking")

if self.config.latency:
latency_tracker = LatencyTracker(backend=backend.config.name, device=backend.config.device)
if self.config.memory:
memory_tracker = MemoryTracker(
backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
)
if self.config.energy:
energy_tracker = EnergyTracker(
backend=backend.config.name, device=backend.config.device, device_ids=backend.config.device_ids
)

context_stack = ExitStack()
if self.config.latency:
context_stack.enter_context(latency_tracker.track())
if self.config.memory:
context_stack.enter_context(memory_tracker.track())
if self.config.energy:
context_stack.enter_context(energy_tracker.track())
latency_tracker = LatencyTracker(backend=backend.config.name, device=backend.config.device)

with ExitStack() as context_stack:
if self.config.memory:
context_stack.enter_context(memory_tracker.track())
if self.config.latency:
context_stack.enter_context(latency_tracker.track())

with context_stack:
self.logger.info("\t+ Loading model for Inference")
backend.load()

if self.config.latency:
self.report.load.latency = latency_tracker.get_latency()
if self.config.memory:
self.report.load.memory = memory_tracker.get_max_memory()
if self.config.energy:
self.report.load.energy = energy_tracker.get_energy()

## Memory tracking
def run_text_generation_memory_tracking(self, backend: Backend[BackendConfigT]):
Expand Down
Loading
Loading