Source code for archai.common.ml_perf_utils

from typing import Callable, Tuple
import psutil
import os
import tracemalloc
import torch
from torch import profiler
from torch import nn
import gc

[docs]def model_memory(create_model:Callable[[], nn.Module])->Tuple[nn.Module, int]: # returns model and memory occupied by the model in process gc.collect() # baseline process memory process = psutil.Process(os.getpid()) baseline_mem = process.memory_info().rss model = create_model() gc.collect() new_mem = process.memory_info().rss return model, new_mem-baseline_mem
[docs]def inference_stats(model:nn.Module, **inputs)->Tuple[int, int, int]: # return memory usage in bytes, cpu time in us # We basically sum "self" time of individual ops, # i.e., not including child time. # Pytorch also has record_function which gives # higher CPU time, probably because it includes # time spent other than ops. # Sometime profiler also generates [memory] node # which has negative value of memory. with torch.no_grad(): with profiler.profile(activities=[profiler.ProfilerActivity.CPU], profile_memory=True, record_shapes=True, with_flops=True) as prof: with profiler.record_function('model_inference'): _ = model(**inputs) t = prof.key_averages() self_time, self_mem, flops, ti_memory, inf_cpu, inf_mem, inf_flops = 0, 0, 0, 0, 0, 0, 0 for ti in t: if ti.key == '[memory]': ti_memory = -ti.self_cpu_memory_usage continue if ti.key == 'model_inference': inf_mem = -ti.cpu_memory_usage inf_cpu = ti.cpu_time_total inf_flops = ti.flops continue self_mem += ti.self_cpu_memory_usage self_time += ti.self_cpu_time_total flops += ti.flops return self_mem, self_time, flops, inf_cpu