# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import logging
from dataclasses import dataclass
from typing import List, Optional, Union
from olive.common.utils import StrEnumBase
from olive.hardware.constants import DEVICE_TO_EXECUTION_PROVIDERS
logger = logging.getLogger(__name__)
[docs]class Device(StrEnumBase):
CPU = "cpu"
CPU_SPR = "cpu_spr"
GPU = "gpu"
NPU = "npu"
VPU = "vpu"
INTEL_MYRIAD = "intel_myriad"
MEM_TO_INT = {"KB": 1e3, "MB": 1e6, "GB": 1e9, "TB": 1e12}
@dataclass(frozen=True, eq=True)
class AcceleratorSpec:
"""Accelerator specification is the concept of a hardware device that be used to optimize or evaluate a model."""
accelerator_type: Union[str, Device]
execution_provider: Optional[str] = None
memory: int = None
def __str__(self) -> str:
str_rep = str(self.accelerator_type).lower()
if self.execution_provider:
str_rep += f"-{self.execution_provider[:-17].lower()}"
if self.memory:
str_rep += f"-memory={self.memory}"
return str_rep
def to_json(self):
json_data = {"accelerator_type": str(self.accelerator_type)}
if self.execution_provider:
json_data["execution_provider"] = self.execution_provider
if self.memory is not None:
json_data["memory"] = self.memory
return json_data
@staticmethod
def str_to_int_memory(v: Union[int, str]) -> int:
if not isinstance(v, str):
return v
if v.isdigit():
return int(v)
v = v.upper()
if v[-2:] not in MEM_TO_INT:
raise ValueError(f"Memory unit {v[-2:]} is not supported. Supported units are {MEM_TO_INT.keys()}")
return int(v[:-2]) * int(MEM_TO_INT[v[-2:]])
DEFAULT_CPU_ACCELERATOR = AcceleratorSpec(accelerator_type=Device.CPU, execution_provider="CPUExecutionProvider")
DEFAULT_GPU_CUDA_ACCELERATOR = AcceleratorSpec(accelerator_type=Device.GPU, execution_provider="CUDAExecutionProvider")
DEFAULT_GPU_TRT_ACCELERATOR = AcceleratorSpec(
accelerator_type=Device.GPU, execution_provider="TensorrtExecutionProvider"
)
class AcceleratorLookup:
@staticmethod
def get_managed_supported_execution_providers(device: Device):
return [*DEVICE_TO_EXECUTION_PROVIDERS.get(device), "CPUExecutionProvider"]
@staticmethod
def get_execution_providers_for_device(device: Device):
import onnxruntime
return AcceleratorLookup.get_execution_providers_for_device_by_available_providers(
device, onnxruntime.get_available_providers()
)
@staticmethod
def get_execution_providers_for_device_by_available_providers(device: Device, available_providers):
eps_per_device = AcceleratorLookup.get_managed_supported_execution_providers(device)
return AcceleratorLookup.get_execution_providers(eps_per_device, available_providers)
@staticmethod
def get_execution_providers(execution_providers, available_providers):
eps = AcceleratorLookup.filter_execution_providers(execution_providers, available_providers)
return eps or available_providers
@staticmethod
def filter_execution_providers(execution_providers, available_providers):
if not execution_providers:
return execution_providers
assert isinstance(execution_providers, list)
assert isinstance(available_providers, list)
return [ep for ep in available_providers if ep in execution_providers]
@staticmethod
def infer_devices_from_execution_providers(execution_providers: List[str]):
"""Infer the device from the execution provider name.
If all the execution provider is uniquely mapped to a device, return the device list.
Otherwise, return None.
Please note that the CPUExecutionProvider is skipped for device infer. And only other ORT EPs are considered.
For example:
execution_provider = ["CPUExecutionProvider", "CUDAExecutionProvider"]
return ["gpu"]
execution_provider = ["CUDAExecutionProvider", "TensorrtExecutionProvider"]
return ["gpu"]
"""
if not execution_providers:
return None
ep_to_devices = {}
for ep in execution_providers:
if ep == "CPUExecutionProvider":
# cannot infer device for CPUExecutionProvider since all ORT EP supports CPU
continue
inferered_devices = []
for device, eps in DEVICE_TO_EXECUTION_PROVIDERS.items():
if ep in eps:
inferered_devices.append(device)
if inferered_devices:
ep_to_devices[ep] = inferered_devices
else:
ep_to_devices[ep] = None
mapped_devices = []
for ep, inferred_device in ep_to_devices.items():
if inferred_device is None:
logger.warning(
"Execution provider %s is not able to be mapped to any device. "
"Olive cannot infer the device which may cause unexpected behavior. "
"Please specify the accelerator in the accelerator configs",
ep,
)
return None
elif len(inferred_device) > 1:
logger.warning(
"Execution provider %s is mapped to multiple devices %s. "
"Olive cannot infer the device which may cause unexpected behavior. "
"Please specify the accelerator in the accelerator configs",
ep,
inferred_device,
)
return None
else:
if inferred_device[0] not in mapped_devices:
mapped_devices.append(inferred_device[0])
return mapped_devices if mapped_devices else None
@staticmethod
def infer_single_device_from_execution_providers(execution_providers: List[str]) -> str:
if not execution_providers:
return None
if execution_providers == ["CPUExecutionProvider"]:
inferred_devices = ["cpu"]
else:
inferred_devices = AcceleratorLookup.infer_devices_from_execution_providers(execution_providers)
assert inferred_devices, (
f"Cannot infer the devices from the execution providers {execution_providers}."
" Please specify the device in the accelerator configs."
)
assert len(inferred_devices) == 1, (
f"Cannot infer the devices from the execution providers {execution_providers}. "
f"Multiple devices are inferred: {inferred_devices}."
" Please specify the device in the accelerator configs."
)
return inferred_devices[0]