diff --git a/Makefile b/Makefile index a2ed997c..55e44e1e 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,9 @@ build_docker_cpu: build_docker_cuda: docker build -f docker/cuda.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) --build-arg TORCH_CUDA=cu118 --build-arg CUDA_VERSION=11.8.0 -t opt-bench-cuda:11.8.0 . +build_docker_rocm: + docker build -f docker/rocm.dockerfile --build-arg USER_ID=$(id -u) --build-arg GROUP_ID=$(id -g) --build-arg TORCH_ROCM=rocm5.6 --build-arg ROCM_VERSION=5.6.1 -t opt-bench-rocm:5.6.1 . + test_cli_cpu_neural_compressor: docker run \ --rm \ diff --git a/README.md b/README.md index cc623d27..e338b888 100644 --- a/README.md +++ b/README.md @@ -3,14 +3,13 @@

Optimum-Benchmark ๐Ÿ‹๏ธ

-Optimum-Benchmark is a unified multi-backend utility for benchmarking [Transformers](https://github.com/huggingface/transformers), [Diffusers](https://github.com/huggingface/diffusers), [PEFT](https://github.com/huggingface/peft), [TIMM](https://github.com/huggingface/pytorch-image-models) and [Optimum](https://github.com/huggingface/optimum) flavors, along with supported optimizations & quantization schemes, for [inference](https://github.com/huggingface/optimum#accelerated-inference) & [training](https://github.com/huggingface/optimum#accelerated-training), on multiple [backends & hardwares](https://github.com/huggingface/optimum-benchmark?tab=readme-ov-file#supported-backendsdevices). +Optimum-Benchmark is a unified [multi-backend & multi-device](#backends--devices-) utility for benchmarking [Transformers](https://github.com/huggingface/transformers), [Diffusers](https://github.com/huggingface/diffusers), [PEFT](https://github.com/huggingface/peft), [TIMM](https://github.com/huggingface/pytorch-image-models) and [Optimum](https://github.com/huggingface/optimum) flavors, along with all their supported [optimizations & quantization schemes](#backend-features-), for [inference & training](#benchmark-features-%EF%B8%8F), in [distributed & non-distributed settings](#backend-features-). ## Motivation ๐Ÿค” -- Hardware vendors wanting to know how their hardware performs compared to others on the same models. -- HF ecosystem users wanting to know how their chosen model performs in terms of latency, throughput, memory usage, energy consumption, etc. +- HF hardware partners wanting to know how their hardware performs compared to another hardware on the same models. +- HF ecosystem users wanting to know how their chosen model performs in terms of latency, throughput, memory usage, energy consumption, etc compared to another model. - Experimenting with hardware & backend specific optimizations & quantization schemes that can be applied to models and improve their computational/memory/energy efficiency. -- [...] ## Current status ๐Ÿ“ˆ @@ -19,23 +18,20 @@ Optimum-Benchmark is a unified multi-backend utility for benchmarking [Transform [![CPU](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_cpu.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_cpu.yaml) [![CUDA](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_cuda.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_cuda.yaml) [![ROCM](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_rocm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_rocm.yaml) -[![MISC](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_misc.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_api_misc.yaml) ### CLI + [![CPU Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_pytorch.yaml) [![CPU OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_onnxruntime.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_onnxruntime.yaml) [![CPU Intel Neural Compressor Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_neural_compressor.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_neural_compressor.yaml) [![CPU OpenVINO Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_openvino.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cpu_openvino.yaml) - [![CUDA Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_pytorch.yaml) -[![CUDA OnnxRuntime Inference Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_onnxruntime_inference.yaml) -[![CUDA Torch-ORT Training Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_torch_ort_training.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_torch_ort_training.yaml) - -[![TensorRT OnnxRuntime Inference Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_onnxruntime_inference.yaml) +[![CUDA OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_onnxruntime_inference.yaml) +[![CUDA Torch-ORT Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_torch_ort_training.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_cuda_torch_ort_training.yaml) +[![TensorRT OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_onnxruntime_inference.yaml) [![TensorRT-LLM Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_llm.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_tensorrt_llm.yaml) - [![ROCm Pytorch Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_pytorch.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_pytorch.yaml) -[![ROCm OnnxRuntime Inference Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_onnxruntime_inference.yaml) +[![ROCm OnnxRuntime Tests](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_onnxruntime_inference.yaml/badge.svg)](https://github.com/huggingface/optimum-benchmark/actions/workflows/test_rocm_onnxruntime_inference.yaml) ## Quickstart ๐Ÿš€ @@ -44,7 +40,7 @@ Optimum-Benchmark is a unified multi-backend utility for benchmarking [Transform You can install `optimum-benchmark` using pip: ```bash -python -m pip install git+https://github.com/huggingface/optimum-benchmark.git +pip install optimum-benchmark ``` or by cloning the repository and installing it in editable mode: @@ -66,33 +62,45 @@ Depending on the backends you want to use, you might need to install some extra - Intel Neural Compressor: `pip install optimum-benchmark[neural-compressor]` - Text Generation Inference: `pip install optimum-benchmark[text-generation-inference]` -### Running benchmarks from python API ๐Ÿงช +### Running benchmarks from Python API ๐Ÿงช -You can run benchmarks from the python API: +You can run benchmarks from the Python API, using the `launch` function from the `optimum_benchmark.experiment` module. Here's an example of how to run a benchmark using the `pytorch` backend, `process` launcher and `inference` benchmark. ```python -import logging -logging.basicConfig(level=logging.INFO) - +from optimum_benchmark.logging_utils import setup_logging from optimum_benchmark.experiment import launch, ExperimentConfig from optimum_benchmark.backends.pytorch.config import PyTorchConfig from optimum_benchmark.launchers.process.config import ProcessConfig from optimum_benchmark.benchmarks.inference.config import InferenceConfig + if __name__ == "__main__": - backend_config = PyTorchConfig(model="gpt2", no_weights=True, device="cuda") - launcher_config = ProcessConfig(device_isolation=True) - benchmark_config = InferenceConfig(memory=True) + setup_logging(level="INFO") + benchmark_config = InferenceConfig(latency=False, memory=True, energy=True) + launcher_config = ProcessConfig() + backend_config = PyTorchConfig( + device="cuda", + no_weights=True, + device_ids="0,1", + device_map="auto", + model="IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm", + ) experiment_config = ExperimentConfig( - experiment_name="api-launch-experiment", + experiment_name="python-api-launch-experiment", benchmark=benchmark_config, launcher=launcher_config, backend=backend_config, ) benchmark_report = launch(experiment_config) - print("benchmark_report:", benchmark_report) + benchmark_report.log_all() + # or + print(benchmark_report.to_dict()) + # or + benchmark_report.push_to_hub("IlyasMoutawwakil/vicuna-7b-v1.5-awq-gemm") ``` +Yep, it's that simple! Check the supported backends, launchers and benchmarks in the [features](#features-) section. + ### Running benchmarks from CLI ๐Ÿƒโ€โ™‚๏ธ You can run a benchmark using the command line by specifying the configuration directory and the configuration name. Both arguments are mandatory for [`hydra`](https://hydra.cc/). `--config-dir` is the directory where the configuration files are stored and `--config-name` is the name of the configuration file without its `.yaml` extension. @@ -161,26 +169,26 @@ Other than the [examples](examples), you can also check [tests](tests/configs/). Everything else is optional or inferred at runtime, but can be configured to your needs. -### Backends & Devices ๐Ÿ“ฑ - -- [x] Pytorch backend for CPU (`device=cpu`, `backend=pytorch`) -- [x] Pytorch backend for CUDA (`device=cuda`, `backend=pytorch`) -- [ ] Pytorch backend for Habana Gaudi Processor (`device=hpu`, `backend=pytorch`) -- [x] OnnxRuntime backend for CPUExecutionProvider (`device=cpu`, `backend=onnxruntime`) -- [x] OnnxRuntime backend for CUDAExecutionProvider (`device=cuda`, `backend=onnxruntime`) -- [x] OnnxRuntime backend for ROCMExecutionProvider (`device=cuda`, `backend=onnxruntime`, `backend.provider=ROCMExecutionProvider`) -- [x] OnnxRuntime backend for TensorrtExecutionProvider (`device=cuda`, `backend=onnxruntime`, `backend.provider=TensorrtExecutionProvider`) -- [x] Intel Neural Compressor backend for CPU (`device=cpu`, `backend=neural-compressor`) -- [x] TensorRT-LLM backend for CUDA (`device=cuda`, `backend=tensorrt-llm`) -- [x] OpenVINO backend for CPU (`device=cpu`, `backend=openvino`) - -### Launcher features ๐Ÿš€ +### Launchers ๐Ÿš€ - [x] Process isolation between consecutive runs (`launcher=process`) -- [x] Assert devices (NVIDIA & AMD GPUs) isolation (`launcher.device_isolation=true`) -- [x] Distributed inference/training (`launcher=torchrun`, `launcher.n_proc_per_node=2`, etc) +- [x] Assert GPU devices (NVIDIA & AMD) isolation (`launcher.device_isolation=true`) +- [x] Distributed inference/training (`launcher=torchrun`, `launcher.n_proc_per_node=2`) + +### Backends & Devices ๐Ÿ“ฑ -### Benchmark features ๐Ÿ‹๏ธ +- [x] Pytorch backend for CPU (`backend=pytorch`, `backend.device=cpu`) +- [x] Pytorch backend for CUDA (`backend=pytorch`, `backend.device=cuda`) +- [ ] Pytorch backend for Habana Gaudi Processor (`backend=pytorch`, `backend.device=habana`) +- [x] OnnxRuntime backend for CPUExecutionProvider (`backend=onnxruntime`, `backend.device=cpu`) +- [x] OnnxRuntime backend for CUDAExecutionProvider (`backend=onnxruntime`, `backend.device=cuda`) +- [x] OnnxRuntime backend for ROCMExecutionProvider (`backend=onnxruntime`, `backend.device=cuda`, `backend.provider=ROCMExecutionProvider`) +- [x] OnnxRuntime backend for TensorrtExecutionProvider (`backend=onnxruntime`, `backend.device=cuda`, `backend.provider=TensorrtExecutionProvider`) +- [x] Intel Neural Compressor backend for CPU (`backend=neural-compressor`, `backend.device=cpu`) +- [x] TensorRT-LLM backend for CUDA (`backend=tensorrt-llm`, `backend.device=cuda`) +- [x] OpenVINO backend for CPU (`backend=openvino`, `backend.device=cpu`) + +### Benchmarking ๐Ÿ‹๏ธ - [x] Memory tracking (`benchmark.memory=true`) - [x] Latency and throughput tracking of forward pass (default) diff --git a/optimum_benchmark/benchmarks/report.py b/optimum_benchmark/benchmarks/report.py index 4aed2eeb..69491d65 100644 --- a/optimum_benchmark/benchmarks/report.py +++ b/optimum_benchmark/benchmarks/report.py @@ -10,9 +10,6 @@ @dataclass class BenchmarkReport(PushToHubMixin): - def to_dict(self) -> dict: - return asdict(self) - def save_pretrained( self, save_directory: Union[str, os.PathLike], @@ -50,6 +47,9 @@ def save_pretrained( token=kwargs.get("token"), ) + def to_dict(self) -> dict: + return asdict(self) + def to_flat_dict(self) -> dict: report_dict = self.to_dict() return flatten(report_dict, reducer="dot") diff --git a/tests/test_api.py b/tests/test_api.py index 1cfc2a96..b8367903 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -48,9 +48,9 @@ TrainingConfig(latency=True, memory=True, energy=True), ] LAUNCHER_CONFIGS = [ - InlineConfig(device_isolation=False), - ProcessConfig(device_isolation=False), TorchrunConfig(nproc_per_node=2, device_isolation=False), + ProcessConfig(device_isolation=False), + InlineConfig(device_isolation=False), ] @@ -144,25 +144,12 @@ def test_api_dataset_generator(library, task, model): _ = generator() -@pytest.mark.parametrize("launcher_config", LAUNCHER_CONFIGS) -def test_api_launchers(launcher_config): - backend_config = PyTorchConfig(model="gpt2", no_weights=True, device="cpu") - benchmark_config = InferenceConfig(memory=True) - experiment_config = ExperimentConfig( - experiment_name="api-launch-experiment", - benchmark=benchmark_config, - launcher=launcher_config, - backend=backend_config, - ) - _ = launch(experiment_config) - - @pytest.mark.parametrize("benchmark_config", BENCHMARK_CONFIGS) @pytest.mark.parametrize("launcher_config", LAUNCHER_CONFIGS) -@pytest.mark.parametrize("backend_config", [PyTorchConfig(model="gpt2", no_weights=True, device="cpu")]) -def test_api_benchmarks(benchmark_config, launcher_config, backend_config): +def test_api_launch_cpu(benchmark_config, launcher_config): + backend_config = PyTorchConfig(model="bert-base-uncased", no_weights=True, device="cpu") experiment_config = ExperimentConfig( - experiment_name="api-benchmark-experiment", + experiment_name="", benchmark=benchmark_config, launcher=launcher_config, backend=backend_config,