Skip to content

Commit

Permalink
Conformance: nncf.quantize_pt2e and OpenVINOQuantize support
Browse files Browse the repository at this point in the history
  • Loading branch information
daniil-lyakhov committed Dec 2, 2024
1 parent 500e549 commit 9e90443
Show file tree
Hide file tree
Showing 7 changed files with 190 additions and 19 deletions.
5 changes: 5 additions & 0 deletions tests/post_training/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ def pytest_addoption(parser):
parser.addoption("--fp32", action="store_true", help="Test original model")
parser.addoption("--cuda", action="store_true", help="Enable CUDA_TORCH backend")
parser.addoption("--benchmark", action="store_true", help="Run benchmark_app")
parser.addoption(
"--validate-in-backend",
action="store_true",
help="Validate quantized model in native backend, not in openvino.",
)
parser.addoption(
"--extra-columns",
action="store_true",
Expand Down
32 changes: 32 additions & 0 deletions tests/post_training/data/ptq_reference_data.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ torchvision/resnet18_backend_CUDA_TORCH:
metric_value: 0.69152
torchvision/resnet18_backend_FX_TORCH:
metric_value: 0.6946
torchvision/resnet18_backend_OV_QUANTIZER_NNCF:
metric_value: 0.6946
torchvision/resnet18_backend_OV_QUANTIZER_AO:
metric_value: 0.6946
torchvision/resnet18_backend_X86_QUANTIZER_NNCF:
metric_value: 0.6946
torchvision/resnet18_backend_X86_QUANTIZER_AO:
metric_value: 0.6946
torchvision/mobilenet_v3_small_BC_backend_FP32:
metric_value: 0.6766
torchvision/mobilenet_v3_small_BC_backend_OV:
Expand All @@ -46,18 +54,42 @@ torchvision/mobilenet_v3_small_BC_backend_ONNX:
metric_value: 0.6679
torchvision/mobilenet_v3_small_BC_backend_FX_TORCH:
metric_value: 0.6679
torchvision/mobilenet_v3_small_BC_backend_OV_QUANTIZER_NNCF:
metric_value: 0.6679
torchvision/mobilenet_v3_small_BC_backend_OV_QUANTIZER_AO:
metric_value: 0.6679
torchvision/mobilenet_v3_small_BC_backend_X86_QUANTIZER_NNCF:
metric_value: 0.6679
torchvision/mobilenet_v3_small_BC_backend_X86_QUANTIZER_AO:
metric_value: 0.6679
torchvision/vit_b_16_backend_FP32:
metric_value: 0.8107
torchvision/vit_b_16_backend_OV:
metric_value: 0.80948
torchvision/vit_b_16_backend_FX_TORCH:
metric_value: 0.80922
torchvision/vit_b_16_backend_OV_QUANTIZER_NNCF:
metric_value: 0.80922
torchvision/vit_b_16_backend_OV_QUANTIZER_AO:
metric_value: 0.80922
torchvision/vit_b_16_backend_X86_QUANTIZER_NNCF:
metric_value: 0.80922
torchvision/vit_b_16_backend_X86_QUANTIZER_AO:
metric_value: 0.80922
torchvision/swin_v2_s_backend_FP32:
metric_value: 0.83712
torchvision/swin_v2_s_backend_OV:
metric_value: 0.83638
torchvision/swin_v2_s_backend_FX_TORCH:
metric_value: 0.8360
torchvision/swin_v2_s_backend_OV_QUANTIZER_NNCF:
metric_value: 0.8360
torchvision/swin_v2_s_backend_OV_QUANTIZER_AO:
metric_value: 0.8360
torchvision/swin_v2_s_backend_X86_QUANTIZER_NNCF:
metric_value: 0.8360
torchvision/swin_v2_s_backend_X86_QUANTIZER_AO:
metric_value: 0.8360
timm/crossvit_9_240_backend_CUDA_TORCH:
metric_value: 0.7275
timm/crossvit_9_240_backend_FP32:
Expand Down
9 changes: 5 additions & 4 deletions tests/post_training/model_scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from nncf.quantization.advanced_parameters import AdvancedScaleEstimationParameters
from nncf.quantization.advanced_parameters import AdvancedSmoothQuantParameters
from tests.post_training.pipelines.base import ALL_PTQ_BACKENDS
from tests.post_training.pipelines.base import FX_BACKENDS
from tests.post_training.pipelines.base import NNCF_PTQ_BACKENDS
from tests.post_training.pipelines.base import BackendType
from tests.post_training.pipelines.causal_language_model import CausalLMHF
Expand Down Expand Up @@ -87,7 +88,7 @@
"model_id": "resnet18",
"pipeline_cls": ImageClassificationTorchvision,
"compression_params": {},
"backends": [BackendType.FX_TORCH, BackendType.TORCH, BackendType.CUDA_TORCH, BackendType.OV, BackendType.ONNX],
"backends": FX_BACKENDS + [BackendType.TORCH, BackendType.CUDA_TORCH, BackendType.OV, BackendType.ONNX],
"batch_size": 128,
},
{
Expand All @@ -98,7 +99,7 @@
"fast_bias_correction": False,
"preset": QuantizationPreset.MIXED,
},
"backends": [BackendType.FX_TORCH, BackendType.OV, BackendType.ONNX],
"backends": FX_BACKENDS + [BackendType.OV, BackendType.ONNX],
"batch_size": 128,
},
{
Expand All @@ -109,7 +110,7 @@
"model_type": ModelType.TRANSFORMER,
"advanced_parameters": AdvancedQuantizationParameters(smooth_quant_alpha=0.15),
},
"backends": [BackendType.FX_TORCH, BackendType.OV],
"backends": FX_BACKENDS + [BackendType.OV],
"batch_size": 1,
},
{
Expand All @@ -120,7 +121,7 @@
"model_type": ModelType.TRANSFORMER,
"advanced_parameters": AdvancedQuantizationParameters(smooth_quant_alpha=0.5),
},
"backends": [BackendType.FX_TORCH, BackendType.OV],
"backends": FX_BACKENDS + [BackendType.OV],
"batch_size": 1,
},
# Timm models
Expand Down
17 changes: 15 additions & 2 deletions tests/post_training/pipelines/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ class BackendType(Enum):
TORCH = "TORCH"
CUDA_TORCH = "CUDA_TORCH"
FX_TORCH = "FX_TORCH"
OV_QUANTIZER_NNCF = "OV_QUANTIZER_NNCF"
OV_QUANTIZER_AO = "OV_QUANTIZER_AO"
X86_QUANTIZER_NNCF = "X86_QUANTIZER_NNCF"
X86_QUANTIZER_AO = "X86_QUANTIZER_AO"
ONNX = "ONNX"
OV = "OV"
OPTIMUM = "OPTIMUM"
Expand All @@ -52,6 +56,13 @@ class BackendType(Enum):
NNCF_PTQ_BACKENDS = [BackendType.TORCH, BackendType.CUDA_TORCH, BackendType.ONNX, BackendType.OV]
ALL_PTQ_BACKENDS = NNCF_PTQ_BACKENDS
PT_BACKENDS = [BackendType.TORCH, BackendType.CUDA_TORCH]
FX_BACKENDS = [
BackendType.FX_TORCH,
BackendType.OV_QUANTIZER_NNCF,
BackendType.OV_QUANTIZER_AO,
BackendType.X86_QUANTIZER_NNCF,
BackendType.X86_QUANTIZER_AO,
]
OV_BACKENDS = [BackendType.OV, BackendType.OPTIMUM]

LIMIT_LENGTH_OF_STATUS = 120
Expand Down Expand Up @@ -211,6 +222,7 @@ def __init__(
reference_data: dict,
no_eval: bool,
run_benchmark_app: bool,
validate_in_backend: bool = False,
params: dict = None,
batch_size: int = 1,
memory_monitor: bool = False,
Expand All @@ -227,6 +239,7 @@ def __init__(
self.memory_monitor = memory_monitor
self.no_eval = no_eval
self.run_benchmark_app = run_benchmark_app
self.validate_in_backend = validate_in_backend
self.output_model_dir: Path = self.output_dir / self.reported_name / self.backend.value
self.output_model_dir.mkdir(parents=True, exist_ok=True)
self.model_name = f"{self.reported_name}_{self.backend.value}"
Expand Down Expand Up @@ -405,8 +418,8 @@ def save_compressed_model(self) -> None:
)
self.path_compressed_ir = self.output_model_dir / "model.xml"
ov.serialize(ov_model, self.path_compressed_ir)
elif self.backend == BackendType.FX_TORCH:
exported_model = torch.export.export(self.compressed_model, (self.dummy_tensor,))
elif self.backend in FX_BACKENDS:
exported_model = torch.export.export(self.model, (self.dummy_tensor,))
ov_model = ov.convert_model(exported_model, example_input=self.dummy_tensor.cpu(), input=self.input_size)
self.path_compressed_ir = self.output_model_dir / "model.xml"
ov.serialize(ov_model, self.path_compressed_ir)
Expand Down
132 changes: 122 additions & 10 deletions tests/post_training/pipelines/image_classification_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,27 @@
import copy
import os

os.environ["TORCHINDUCTOR_FREEZING"] = "1"

from itertools import islice

import numpy as np
import openvino as ov
import torch
from sklearn.metrics import accuracy_score
from torch.ao.quantization.quantize_pt2e import convert_pt2e
from torch.ao.quantization.quantize_pt2e import prepare_pt2e
from torch.ao.quantization.quantizer.x86_inductor_quantizer import X86InductorQuantizer
from torch.ao.quantization.quantizer.x86_inductor_quantizer import get_default_x86_inductor_quantization_config
from torchvision import datasets

import nncf
from nncf.common.logging.track_progress import track
from nncf.experimental.common.quantization.algorithms.quantizer.openvino_quantizer import OpenVINOQuantizer
from nncf.experimental.torch.fx.quantization.quantize_pt2e import quantize_pt2e
from tests.post_training.pipelines.base import DEFAULT_VAL_THREADS
from tests.post_training.pipelines.base import FX_BACKENDS
from tests.post_training.pipelines.base import BackendType
from tests.post_training.pipelines.base import PTQTestPipeline


Expand All @@ -33,18 +45,15 @@ def prepare_calibration_dataset(self):

self.calibration_dataset = nncf.Dataset(loader, self.get_transform_calibration_fn())

def _validate(self):
val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)

dataset_size = len(val_loader)

# Initialize result tensors for async inference support.
predictions = np.zeros((dataset_size))
references = -1 * np.ones((dataset_size))
def _validate_ov(
self,
val_loader: torch.utils.data.DataLoader,
predictions: np.ndarray,
references: np.ndarray,
dataset_size: int,
):

core = ov.Core()

if os.environ.get("INFERENCE_NUM_THREADS"):
# Set CPU_THREADS_NUM for OpenVINO inference
inference_num_threads = os.environ.get("INFERENCE_NUM_THREADS")
Expand Down Expand Up @@ -73,8 +82,111 @@ def process_result(request, userdata):
references[i] = target

infer_queue.wait_all()
return predictions, references

def _validate_torch_compile(
self, val_loader: torch.utils.data.DataLoader, predictions: np.ndarray, references: np.ndarray
):
# compiled_model = torch.compile(self.compressed_model, backend="openvino")
q_num = 0
for node in self.compressed_model.graph.nodes:
if ".quantize_per" in str(node.target):
q_num += 1

print(f"Qunatize ops num: {q_num}")

if self.backend in [BackendType.X86_QUANTIZER_AO, BackendType.X86_QUANTIZER_NNCF]:
compiled_model = torch.compile(self.compressed_model)
else:
compiled_model = torch.compile(self.compressed_model, backend="openvino")

for i, (images, target) in enumerate(val_loader):
# W/A for memory leaks when using torch DataLoader and OpenVINO
pred = compiled_model(images)
pred = torch.argmax(pred, dim=1)
predictions[i] = pred.numpy()
references[i] = target.numpy()
return predictions, references

def _validate(self):
val_dataset = datasets.ImageFolder(root=self.data_dir / "imagenet" / "val", transform=self.transform)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=1, num_workers=2, shuffle=False)

dataset_size = len(val_loader)

# Initialize result tensors for async inference support.
predictions = np.zeros((dataset_size))
references = -1 * np.ones((dataset_size))

if self.backend in FX_BACKENDS:
predictions, references = self._validate_torch_compile(val_loader, predictions, references)
else:
predictions, references = self._validate_ov(val_loader, predictions, references, dataset_size)

acc_top1 = accuracy_score(predictions, references)

self.run_info.metric_name = "Acc@1"
self.run_info.metric_value = acc_top1

def _compress_torch_ao(self, quantizer):

prepared_model = prepare_pt2e(self.model, quantizer)
subset_size = self.compression_params.get("subset_size", 300)
for data in islice(self.calibration_dataset.get_inference_data(), subset_size):
prepared_model(data)
self.compressed_model = convert_pt2e(prepared_model)

def _compress_nncf_pt2e(self, quantizer):
pt2e_kwargs = {}
for key in (
"subset_size",
"fast_bias_correction",
"smooth_quant",
"bias_correction_params",
"smooth_quant_params",
"activations_range_estimator_params",
"weights_range_estimator_params",
):
if key in self.compression_params:
pt2e_kwargs[key] = self.compression_params[key]
smooth_quant = False
if self.compression_params.get("model_type", False):
smooth_quant = self.compression_params["model_type"] == nncf.ModelType.TRANSFORMER
self.compressed_model = quantize_pt2e(
self.model, quantizer, self.calibration_dataset, smooth_quant=smooth_quant, fold_quantize=False
)

def _compress(self):
"""
Quantize self.model
"""
if self.backend not in FX_BACKENDS or self.backend == BackendType.FX_TORCH:
super()._compress()
return

if self.backend in [BackendType.OV_QUANTIZER_AO, BackendType.OV_QUANTIZER_NNCF]:
quantizer_kwargs = {}
for key in (
"mode",
"preset",
"target_device",
"model_type",
"ignored_scope",
"overflow_fix",
"quantize_outputs",
"activations_quantization_params",
"weights_quantization_params",
"quantizer_propagation_rule",
):
if key in self.compression_params:
quantizer_kwargs[key] = self.compression_params[key]
quantizer = OpenVINOQuantizer(**quantizer_kwargs)
else:

quantizer = X86InductorQuantizer()
quantizer.set_global(get_default_x86_inductor_quantization_config())

if self.backend in [BackendType.OV_QUANTIZER_NNCF, BackendType.X86_QUANTIZER_NNCF]:
self._compress_nncf_pt2e(quantizer)
else:
self._compress_torch_ao(quantizer)
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from torchvision import models

from nncf.torch import disable_patching
from tests.post_training.pipelines.base import FX_BACKENDS
from tests.post_training.pipelines.base import PT_BACKENDS
from tests.post_training.pipelines.base import BackendType
from tests.post_training.pipelines.image_classification_base import ImageClassificationBase
Expand Down Expand Up @@ -75,7 +76,7 @@ def prepare_model(self) -> None:
if self.batch_size > 1: # Dynamic batch_size shape export
self.input_size[0] = -1

if self.backend == BackendType.FX_TORCH:
if self.backend in FX_BACKENDS:
with torch.no_grad():
with disable_patching():
self.model = self.model_params.export_fn(model, (self.dummy_tensor,))
Expand Down Expand Up @@ -121,7 +122,7 @@ def _dump_model_fp32(self) -> None:
)
ov.serialize(ov_model, self.fp32_model_dir / "model_fp32.xml")

if self.backend == BackendType.FX_TORCH:
if self.backend in FX_BACKENDS:
exported_model = torch.export.export(self.model, (self.dummy_tensor,))
ov_model = ov.convert_model(exported_model, example_input=self.dummy_tensor, input=self.input_size)
ov.serialize(ov_model, self.fp32_model_dir / "fx_model_fp32.xml")
Expand All @@ -133,7 +134,7 @@ def prepare_preprocessor(self) -> None:
self.transform = self.model_params.weights.transforms()

def get_transform_calibration_fn(self):
if self.backend in [BackendType.FX_TORCH] + PT_BACKENDS:
if self.backend in FX_BACKENDS + PT_BACKENDS:
device = torch.device("cuda" if self.backend == BackendType.CUDA_TORCH else "cpu")

def transform_fn(data_item):
Expand Down
7 changes: 7 additions & 0 deletions tests/post_training/test_quantize_conformance.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ def fixture_run_benchmark_app(pytestconfig):
return pytestconfig.getoption("benchmark")


@pytest.fixture(scope="session", name="validate_in_backend")
def fixture_validate_in_backend(pytestconfig):
return pytestconfig.getoption("validate_in_backend")


@pytest.fixture(scope="session", name="extra_columns")
def fixture_extra_columns(pytestconfig):
return pytestconfig.getoption("extra_columns")
Expand Down Expand Up @@ -266,6 +271,7 @@ def test_ptq_quantization(
run_torch_cuda_backend: bool,
subset_size: Optional[int],
run_benchmark_app: bool,
validate_in_backend: bool,
capsys: pytest.CaptureFixture,
extra_columns: bool,
memory_monitor: bool,
Expand Down Expand Up @@ -293,6 +299,7 @@ def test_ptq_quantization(
"data_dir": data_dir,
"no_eval": no_eval,
"run_benchmark_app": run_benchmark_app,
"validate_in_backend": validate_in_backend,
"batch_size": batch_size,
"memory_monitor": memory_monitor,
}
Expand Down

0 comments on commit 9e90443

Please sign in to comment.