Source code for archai.common.ml_perf_utils

from typing import Callable, Tuple
import psutil
import os
import tracemalloc
import torch
from torch import profiler
from torch import nn
import gc

[docs]def model_memory(create_model:Callable[[], nn.Module])->Tuple[nn.Module, int]:
    # returns model and memory occupied by the model in process
    gc.collect()

    # baseline process memory
    process = psutil.Process(os.getpid())
    baseline_mem = process.memory_info().rss

    model = create_model()

    gc.collect()

    new_mem = process.memory_info().rss

    return model, new_mem-baseline_mem

[docs]def inference_stats(model:nn.Module, **inputs)->Tuple[int, int, int]:
    # return memory usage in bytes, cpu time in us
    # We basically sum "self" time of individual ops,
    # i.e., not including child time.
    # Pytorch also has record_function which gives
    # higher CPU time, probably because it includes
    # time spent other than ops.
    # Sometime profiler also generates [memory] node
    # which has negative value of memory.
    with torch.no_grad():
        with profiler.profile(activities=[profiler.ProfilerActivity.CPU], profile_memory=True, record_shapes=True, with_flops=True) as prof:
            with profiler.record_function('model_inference'):
                _ = model(**inputs)
    t = prof.key_averages()
    self_time, self_mem, flops, ti_memory, inf_cpu, inf_mem, inf_flops = 0, 0, 0, 0, 0, 0, 0
    for ti in t:
        if ti.key == '[memory]':
            ti_memory = -ti.self_cpu_memory_usage
            continue
        if ti.key == 'model_inference':
            inf_mem = -ti.cpu_memory_usage
            inf_cpu = ti.cpu_time_total
            inf_flops = ti.flops
            continue
        self_mem += ti.self_cpu_memory_usage
        self_time += ti.self_cpu_time_total
        flops += ti.flops
    return self_mem, self_time, flops, inf_cpu