Source code for archai.discrete_search.evaluators.pt_profiler_utils.pt_profiler_eval

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

import statistics
from typing import Any, Dict, List, Optional, Union

import torch

from archai.discrete_search.evaluators.pt_profiler_utils.pt_profiler_model import (
    ProfilerModel,
)


[docs]def profile( model: torch.nn.Module, forward_args: Optional[List[Any]] = None, forward_kwargs: Optional[Dict[str, Any]] = None, num_warmups: Optional[int] = 1, num_samples: Optional[int] = 1, use_cuda: Optional[bool] = False, use_median: Optional[bool] = False, ignore_layers: Optional[List[str]] = None, ) -> Dict[str, Union[float, int]]: """Profile a PyTorch model. Outputs FLOPs, MACs, number of parameters, latency and peak memory. Args: model: PyTorch model. forward_args: `model.forward()` arguments used for profilling. forward_kwargs: `model.forward()` keyword arguments used for profilling. num_warmups: Number of warmup runs before profilling. num_samples: Number of runs after warmup. use_cuda: Whether to use CUDA instead of CPU. use_median: Whether to use median instead of mean to average memory and latency. ignore_layers: List of layer names that should be ignored during profiling. Returns: FLOPs, MACs, number of parameters, latency (seconds) and peak memory (bytes). """ assert isinstance(model, torch.nn.Module), "`model` must be a PyTorch model." forward_args = forward_args if forward_args is not None else [] forward_args = [forward_args] if isinstance(forward_args, torch.Tensor) else forward_args forward_kwargs = forward_kwargs or {} if use_cuda: # Ensures that model and all inputs are put on CUDA before profiling model.to("cuda") forward_args = tuple([arg.to("cuda") for arg in forward_args]) forward_kwargs = {key: value.to("cuda") for key, value in forward_kwargs.items()} profiler = ProfilerModel(model) model.eval() for _ in range(num_warmups): with torch.no_grad(): _ = model(*forward_args, **forward_kwargs) result = {"flops": None, "macs": None, "n_parameters": None, "latency": [], "peak_memory": []} for _ in range(num_samples): profiler.start(ignore_layers=ignore_layers) with torch.no_grad(): _ = model(*forward_args, **forward_kwargs) result.update( {"flops": profiler.get_flops(), "macs": profiler.get_macs(), "n_parameters": profiler.get_params()} ) result["latency"].append(profiler.get_latency()) result["peak_memory"].append(profiler.get_peak_memory()) profiler.end() if use_cuda: # Ensures that model and all inputs are put on CPU after profiling to avoid # overloading GPU memory model.to("cpu") forward_args = tuple([arg.to("cpu") for arg in forward_args]) forward_kwargs = {key: value.to("cpu") for key, value in forward_kwargs.items()} stat = statistics.median if use_median else statistics.mean result["latency"] = stat(result["latency"]) result["peak_memory"] = stat(result["peak_memory"]) return result