Source code for archai.discrete_search.evaluators.onnx_model
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
from typing import Any, Dict, List, Optional, Tuple, Union
import onnxruntime as rt
import torch
from overrides import overrides
from archai.common.timing import MeasureBlockTime
from archai.discrete_search.api.archai_model import ArchaiModel
from archai.discrete_search.api.model_evaluator import ModelEvaluator
from archai.common.file_utils import TemporaryFiles
[docs]class AvgOnnxLatency(ModelEvaluator):
"""Evaluate the average ONNX Latency (in seconds) of an architecture.
The latency is measured by running the model on random inputs and averaging the latency over
`num_trials` trials.
"""
def __init__(
self,
input_shape: Union[Tuple[int, ...], List[Tuple[int, ...]]],
num_trials: Optional[int] = 1,
input_dtype: Optional[str] = "torch.FloatTensor",
rand_range: Optional[Tuple[float, float]] = (0.0, 1.0),
export_kwargs: Optional[Dict[str, Any]] = None,
device: Optional[str] = 'cpu',
inf_session_kwargs: Optional[Dict[str, Any]] = None,
) -> None:
"""Initialize the evaluator.
Args:
input_shape: Input shape(s) of the model. If a list of shapes is provided,
the model is assumed to have multiple inputs.
num_trials: Number of trials to run.
input_dtype: Data type of the input.
rand_range: Range of random values to use for the input.
export_kwargs: Keyword arguments to pass to `torch.onnx.export`.
inf_session_kwargs: Keyword arguments to pass to `onnxruntime.InferenceSession`.
"""
input_shapes = [input_shape] if isinstance(input_shape, tuple) else input_shape
rand_min, rand_max = rand_range
self.sample_input = tuple(
[
((rand_max - rand_min) * torch.rand(*input_shape) + rand_min).type(input_dtype)
for input_shape in input_shapes
]
)
self.input_dtype = input_dtype
self.rand_range = rand_range
self.num_trials = num_trials
self.export_kwargs = export_kwargs or dict()
self.inf_session_kwargs = inf_session_kwargs or dict()
self.device = device
[docs] @overrides
def evaluate(self, model: ArchaiModel, budget: Optional[float] = None) -> float:
model.arch.to("cpu")
# Exports model to ONNX
with TemporaryFiles() as tmp_file:
onnx_file = tmp_file.get_temp_file()
torch.onnx.export(
model.arch,
self.sample_input,
onnx_file,
input_names=[f"input_{i}" for i in range(len(self.sample_input))],
**self.export_kwargs,
)
# Benchmarks ONNX model
onnx_device = "CUDAExecutionProvider" if self.device == 'gpu' else "CPUExecutionProvider"
onnx_session = rt.InferenceSession(onnx_file, providers=[onnx_device], **self.inf_session_kwargs)
sample_input = {f"input_{i}": inp.numpy() for i, inp in enumerate(self.sample_input)}
inf_times = []
for _ in range(self.num_trials):
with MeasureBlockTime("onnx_inference") as t:
onnx_session.run(None, input_feed=sample_input)
inf_times.append(t.elapsed)
return sum(inf_times) / self.num_trials