Source code for archai.supergraph.algos.petridish.petridish_utils

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import bisect
import math
import os
import random
from enum import Enum
from typing import List, Optional, Tuple

import matplotlib.pyplot as plt
import numpy as np
import tensorwatch as tw
import yaml
from tensorwatch import ModelStats

from archai.common import utils
from archai.common.ordered_dict_logger import get_global_logger
from archai.supergraph.nas.model_desc import ModelDesc
from archai.supergraph.utils.metrics import Metrics

logger = get_global_logger()


[docs]class JobStage(Enum):
    # below values must be assigned in sequence so getting next job stage enum is easy
    SEED = 1
    SEED_TRAINED = 2
    SEARCH = 3
    SEARCH_TRAINED = 4
    EVAL = 5
    EVAL_TRAINED = 6

[docs]class ExperimentStage(Enum):
    SEARCH = 1
    EVAL = 2

[docs]class ConvexHullPoint:
    def __init__(self, job_stage:JobStage, parent_id:int,
                 sampling_count:int,
                 model_desc:ModelDesc,
                 cells_reductions_nodes:Tuple[int, int, int],
                 metrics:Optional[Metrics]=None,
                 model_stats:Optional[tw.ModelStats]=None) -> None:
        # we only record points after training
        self.job_stage = job_stage
        self.parent_id = parent_id
        self.sampling_count = sampling_count
        self.model_desc = model_desc
        self.cells_reductions_nodes = cells_reductions_nodes
        self.metrics = metrics
        self.model_stats = model_stats

        # TODO: we use random IDs because with ray multiprocessing, its harder to have global
        # id generation. Ideally we should use UUID or global store but for now we just
        # use large enough random range
        ConvexHullPoint._id = random.randint(0, 2147483648)
        self.id = ConvexHullPoint._id

[docs]    def is_trained_stage(self)->bool:
        return self.job_stage==JobStage.SEARCH_TRAINED or self.job_stage==JobStage.SEED_TRAINED

[docs]    def next_stage(self)->JobStage:
        return JobStage(self.job_stage.value+1)

def _is_on_ray_left(x1, y1, x2, y2, x3, y3, inclusive=False, epsilon=0):
    """
    Return whether x3,y3 is on the left side of the ray x1,y1 -> x2,y2.
    If inclusive, then the answer is left or on the ray.
    If otherwise, then the answer is strictly left.
    """
    val = (x2 - x1) * (y3 - y1) - (x3 - x1) * (y2 - y1)
    if inclusive:
        return val >= epsilon
    return val > epsilon


def _convex_hull_from_points(xs, ys, eps=None, allow_increase=False):
    """
    Andrew's Monotone Chain Algorithm: (https://en.wikipedia.org/wiki/Graham_scan)

    Assume the data are sorted in order of xs, then the computation complexity is O(n)
    If not sorted, then a sort by x-value is applied first. The complexity becomes O(nlog(n))

    Return:

    hull_indices (list): indices for the points on the hull exactly
    eps_indices (list): indices for the points on the hull + eps tolerance
    """
    indices = list(range(len(xs)))
    if len(xs) <= 1:
        return indices, indices
    # check xs sorted
    is_monotone = True
    for i in range(1, len(xs)):
        if xs[i] < xs[i-1]:
            is_monotone = False
            break
    if not is_monotone:
        indices.sort(key=lambda i : (xs[i], ys[i]))

    def _remove_non_hull_idx(x1, y1, idxs):
        while len(idxs) > 1:
            x2, y2 = xs[idxs[-1]], ys[idxs[-1]]
            x3, y3 = xs[idxs[-2]], ys[idxs[-2]]
            if not _is_on_ray_left(x1, y1, x2, y2, x3, y3):
                if np.abs(x1 - x2) > 1e-6 or np.abs(y1 - y2) > 1e-6:
                    # this ensures that the no points are duplicates
                    break
            del idxs[-1]
        return idxs

    hull_indices = []
    min_y = float('inf')
    for idx in indices:
        x1, y1 = xs[idx], ys[idx]
        min_y = min(y1, min_y)
        hull_indices = _remove_non_hull_idx(x1, y1, hull_indices)
        hull_indices.append(idx)
    if not allow_increase:
        # use a fake final point at (2 * x_max , y_min) to remove increasing.
        x1, y1 = xs[indices[-1]] * 2, min_y
        hull_indices = _remove_non_hull_idx(x1, y1, hull_indices)

    # compute epsilon hull (convex hull + (1+eps) band)
    eps_indices = hull_indices
    if eps is not None and eps > 0:
        eps_indices = []
        h_idx = 0 # right idx, in the hull_indices
        for idx in indices:
            x = xs[idx]
            y = ys[idx]
            if h_idx >= len(hull_indices):
                # Larger than the largest model on the hull
                #y_interp = min_y
                y_interp = ys[hull_indices[-1]]
            elif idx == hull_indices[h_idx]:
                # critical pts on hull
                y_interp = y
                x1, y1 = x, y # hull point to left
                h_idx += 1
                if h_idx < len(hull_indices):
                    x2, y2 = xs[hull_indices[h_idx]], ys[hull_indices[h_idx]]
                else:
                    #x2, y2 = xs[indices[-1]] * 2, min_y
                    x2, y2 = xs[indices[-1]] * 2, ys[hull_indices[-1]]
            else:
                # Between pts of hull
                try:
                    y_interp = y1 + (y2 - y1) / (x2 - x1) * (x - x1)
                    if np.isnan(y_interp):
                        y_interp = min(y1, y2)
                except:
                    # numerical issues when x2, x1 are close
                    y_interp = min(y1, y2)
            if y <= y_interp * (1. + eps):
                eps_indices.append(idx)
                assert x1 <= x and x2 >= x, "idx={} idx[h_idx-1]={} idx[h_idx]={}  x={} y={} x1={} x2={} y1={} y2={} y_interp={}".format(\
                    idx, hull_indices[h_idx-1], hull_indices[h_idx], x, y, x1, x2, y1, y2, y_interp)
    return hull_indices, eps_indices


def _test_convex_hull():
    # random points,
    np.random.seed(19921102)
    xs = np.random.uniform(size=100)
    ys = np.random.uniform(size=100) - xs + 1.0
    eps = np.random.uniform(low=0.0, high=0.3)

    # compute eps convex hull.
    hull_indices, indices = _convex_hull_from_points(xs, ys, eps=eps)

    # plot
    import matplotlib.pyplot as plt
    plt.close('all')

    hull_xs = [xs[i] for i in indices]
    hull_ys = [ys[i] for i in indices]
    bound_xs = [xs[i] for i in hull_indices]
    bound_ys = [ys[i] * (1+eps) for i in hull_indices]
    plt.plot(bound_xs, bound_ys, c='red', label='eps-bound')
    plt.scatter(xs, ys, label='pts')
    plt.scatter(hull_xs, hull_ys, c='black', marker='+', label='eps-hull')
    plt.show()
    #plt.savefig(os.path.join('./temp', 'debug', 'convex_hull.png'),
    #     dpi=plt.gcf().dpi, bbox_inches='tight')


def _convex_hull_insert(hull_xs, hull_ys, x, y, eps=0.0, eg_xs=[], eg_ys=[]):
    """
    Insert a new point (x,y) to a lower convex hull defined by
    hull_xs and hull_ys.

    Assume hull_xs are sorted (increasing).

    returns:
    remove_slice (slice or None) : None if no insert. slice(left, right) if to
    remove the indices left:right
    """
    assert y >= 0, "eps greedy of convex hull need all y > 0"
    # right most point is a fake point (inf, min_y), so the curve is decreasing.
    # left most point is a fake point (0, inf), so that the cheapest is always kept.

    n_xs = len(hull_xs)
    if n_xs == 0:
        # always insert
        return slice(0, 0)

    min_y = np.min(hull_ys)
    idx = bisect.bisect_left(hull_xs, x)
    if idx == n_xs:
        # right most (also covers the n_xs == 1)
        hull_y = min_y

    elif hull_xs[idx] == x:
        hull_y = hull_ys[idx]

    elif idx == 0:
        # left most
        hull_y = y * 1.1
        if hull_y == 0:
            hull_y = 1.0

    else:
        y1, y2 = hull_ys[idx-1:idx+1]
        x1, x2 = hull_xs[idx-1:idx+1]
        # linear interpolate
        hull_y = y1 + (y2 - y1) / (x2 - x1) * (x - x1)

    diff = ( y - hull_y ) / hull_y
    if diff > eps:
        # do not insert
        return None
    if diff >= 0:
        # we insert b/c of eps-greedy. Or there are three points on the same line.
        return slice(n_xs, n_xs)
    # now diff < 0
    # fix left side
    slice_left = idx
    for li in reversed(range(1, idx)):
        x3, x2 = hull_xs[li-1:li+1]
        y3, y2 = hull_ys[li-1:li+1]
        to_rm_li = _is_on_ray_left(x, y, x2, y2, x3, y3, inclusive=False)
        if to_rm_li:
            slice_left = li
        else:
            break
    # fix right side
    slice_right = idx
    min_y = min(y, min_y)
    for li in range(idx, n_xs):
        if li < n_xs - 1:
            x2, x3 = hull_xs[li:li+2]
            y2, y3 = hull_ys[li:li+2]
        else:
            x2, x3 = hull_xs[li], hull_xs[li] * 2
            y2, y3 = hull_ys[li], min_y
        to_rm_li = not _is_on_ray_left(x, y, x2, y2, x3, y3, inclusive=True)
        if to_rm_li:
            slice_right = li + 1
        else:
            break
    # TODO epsilon greedy
    return slice(slice_left, slice_right)


def _test_convex_hull_insert():
    import argparse

    import matplotlib.pyplot as plt
    plt.close('all')
    parser = argparse.ArgumentParser()
    parser.add_argument('--xs', type=str)
    parser.add_argument('--ys', type=str)
    parser.add_argument('-x', type=float)
    parser.add_argument('-y', type=float)
    args = parser.parse_args()

    y_max = 1.0
    y_min = 0.01

    def _random_convex_perf_curve(n):
        slopes = np.random.uniform(low=-3, high=-1e-2, size=n-1)
        slopes.sort()

        delta_ys = np.random.uniform(size=n-1)
        delta_ys = delta_ys / np.sum(delta_ys) * (-y_max + y_min)
        delta_xs = delta_ys / slopes
        xs = [1] + list(np.cumsum(delta_xs) + 1)
        ys = [y_max] + list(np.cumsum(delta_ys) + y_max)
        return xs, ys

    if args.xs and args.ys:
        xs = list(map(float, args.xs.split(',')))
        ys = list(map(float, args.ys.split(',')))
    else:
        xs, ys = _random_convex_perf_curve(8)
    print(xs)
    print(ys)
    plt.plot(xs + [xs[-1] * 2.0], ys + [y_min], color='r', marker='x')
    n = len(xs)

    scater_xs, scater_ys = [], []
    if args.x and args.y:
        x = args.x
        y = args.y
        scater_xs.append(x)
        scater_ys.append(y)

    locs = [0, 1, n//2, n-2, n-1]
    for i in locs:
        if i < n-1:
            x = (xs[i] + xs[i+1]) / 2.
            y = max(y_min / 2, min(y_max, (1 + np.random.uniform(-0.9, -0.3)) * (ys[i] + ys[i+1]) / 2.))
        else:
            x = xs[i] * 1.5
            y = max(y_min / 2, min(y_max, (1 + np.random.uniform(-0.9, -0.3)) * ys[i]))
        scater_xs.append(x)
        scater_ys.append(y)

    x = (2 * xs[-1] + xs[-2]) / 3
    y = y_min / 2
    scater_xs.append(x)
    scater_ys.append(y)
    for x, y in zip(scater_xs, scater_ys):
        ret = _convex_hull_insert(xs, ys, x, y)
        print("x={} y={} ret={}".format(x, y, ret))

    plt.scatter(scater_xs, scater_ys)
    plt.savefig(os.path.join('./temp', 'debug', 'convex_hull_insert.png'),
        dpi=plt.gcf().dpi, bbox_inches='tight')

[docs]def model_descs_on_front(hull_points:List[ConvexHullPoint], convex_hull_eps:float,
                         stage:ExperimentStage, lower_hull:bool=True)\
        ->Tuple[List[ConvexHullPoint], List[ConvexHullPoint], List[float], List[float]]:
    assert(len(hull_points) > 0)

    top1_list = get_top1_for_stage(hull_points, stage)

    xs = [point.model_stats.MAdd for point in hull_points]
    ys = [1.0-top1 if lower_hull else top1 for top1 in top1_list]

    hull_indices, eps_indices = _convex_hull_from_points(xs, ys, eps=convex_hull_eps)
    eps_points = [hull_points[i] for i in eps_indices]
    front_points = [hull_points[i] for i in hull_indices]

    return front_points, eps_points, xs, ys

[docs]def hull_points2tsv(points:List[ConvexHullPoint])->str:
    lines = ['\t'.join(['id', 'job_stage',
                        'cells', 'reductions', 'nodes',
                        'MAdd', 'flops', 'duration',
                        'mread', 'mwrite',
                        'inference_memory', 'parameters',
                        'train_best_epoch', 'train_best_top1',
                        'val_best_epoch', 'val_best_top1',
                        'test_best_epoch', 'test_best_top1',
                        'paent_id', 'sampling_count'])]

    for p in points:
        cells, reductions, nodes = p.cells_reductions_nodes
        mstats, metrics = p.model_stats, p.metrics

        vals = []
        vals.extend([p.id, JobStage(p.job_stage).name])

        # add macro
        vals.extend([cells, reductions, nodes])

        # add model stats
        vals.extend([mstats.MAdd, mstats.Flops, mstats.duration])
        vals.extend([mstats.mread, mstats.mwrite])
        vals.extend([mstats.inference_memory, mstats.parameters])

        # add metrics
        train_metrics, val_metrics, test_metrics = metrics.run_metrics.best_epoch()
        vals.extend([train_metrics.index, train_metrics.top1.avg])
        if val_metrics:
            vals.extend([val_metrics.index, val_metrics.top1.avg])
        else:
            vals.extend([math.nan, math.nan])
        if test_metrics:
            vals.extend([test_metrics.index, test_metrics.top1.avg])
        else:
            vals.extend([math.nan, math.nan])


        # other attributes
        vals.extend([p.parent_id, p.sampling_count])

        line = '\t'.join([str(v) for v in vals])
        lines.append(line)

    return '\n'.join(lines)

[docs]def sample_from_hull(hull_points:List[ConvexHullPoint], convex_hull_eps:float,
                     stage:ExperimentStage=ExperimentStage.SEARCH)->ConvexHullPoint:
    front_points, eps_points, xs, ys = model_descs_on_front(hull_points,
        convex_hull_eps, stage)

    logger.info(f'num models in pool: {len(hull_points)}')
    logger.info(f'num models on front: {len(front_points)}')
    logger.info(f'num models on front with eps: {len(eps_points)}')

    # form scores to non-maxima supress models already sampled
    counts = [point.sampling_count for point in eps_points]
    counts_max = max(counts)
    counts_min = min(counts)
    if counts_max == counts_min:
        counts_range = counts_max
    else:
        counts_range = counts_max - counts_min
    # to prevent division by 0
    if counts_range == 0:
        counts_range = 1
    # scale between [0,1] to avoid numerical issues
    scaled_counts = [(count - counts_min)/counts_range for count in counts]
    count_scores = [1.0/(scaled_count + 1) for scaled_count in scaled_counts]

    # form scores to sample inversely proportional to madds
    # since it takes less compute to train a smaller model
    # this allows us to evaluate each point equal number of times
    # with any compute budget
    eps_madds = [point.model_stats.MAdd for point in eps_points]
    madd_max = max(eps_madds)
    madd_min = min(eps_madds)
    if madd_max == madd_min:
        madd_range = madd_max
    else:
        madd_range = madd_max - madd_min
    # to prevent division by 0
    if madd_range == 0:
        madd_range = 1
    # scale between [0,1] to avoid numerical issues
    scaled_madds = [(madd - madd_min)/madd_range for madd in eps_madds]
    madd_scores = [1.0/(scaled_madd + 1) for scaled_madd in scaled_madds]

    overall_scores = np.array(count_scores) + np.array(madd_scores)
    overall_scores = overall_scores / np.sum(overall_scores)

    sampled_point  = np.random.choice(eps_points, p=overall_scores)

    sampled_point.sampling_count += 1

    return sampled_point

[docs]def get_top1_for_stage(hull_points:List[ConvexHullPoint], stage:ExperimentStage)->List[float]:
    '''Return top1 accuracy according to the experiment stage (SEARCH or EVAL)'''

    if stage == ExperimentStage.SEARCH:
        top1_list = [point.metrics.best_val_top1() for point in hull_points]
    else:
        top1_list = [point.metrics.best_test_top1() for point in hull_points]

    return top1_list

[docs]def plot_frontier(hull_points:List[ConvexHullPoint], convex_hull_eps:float,
                   expdir:str, stage:ExperimentStage=ExperimentStage.SEARCH)->None:
    front_points, eps_points, xs, ys = model_descs_on_front(hull_points,
        convex_hull_eps, stage)

    top1_list_front = get_top1_for_stage(front_points, stage)
    top1_list_eps = get_top1_for_stage(eps_points, stage)

    # save a plot of the convex hull to aid debugging

    hull_xs = [p.model_stats.MAdd for p in eps_points]
    hull_ys = [1.0-top1 for top1 in top1_list_eps]
    bound_xs = [p.model_stats.MAdd for p in front_points]
    bound_ys = [(1.0-top1) * (1+convex_hull_eps) for top1 in top1_list_front]

    # for easier interpretation report everything in million increments
    xs_m = [x/1e6 for x in xs]
    hull_xs_m = [x/1e6 for x in hull_xs]
    bound_xs_m = [x/1e6 for x in bound_xs]


    plt.clf()
    plt.plot(bound_xs_m, bound_ys, c='red', label='eps-bound')
    plt.scatter(xs_m, ys, label='pts')
    plt.scatter(hull_xs_m, hull_ys, c='black', marker='+', label='eps-hull')
    plt.xlabel('Multiply-Additions (Millions)')
    plt.ylabel('Top1 Error')
    plt.savefig(os.path.join(expdir, 'convex_hull.png'),
        dpi=plt.gcf().dpi, bbox_inches='tight')

[docs]def plot_pool(hull_points:List[ConvexHullPoint], expdir:str,
              stage:ExperimentStage=ExperimentStage.SEARCH)->None:

    assert(len(hull_points) > 0)

    xs_madd = []
    xs_flops = []
    xs_params = []
    ys = get_top1_for_stage(hull_points, stage)
    for p in hull_points:
        xs_madd.append(p.model_stats.MAdd)
        xs_flops.append(p.model_stats.Flops)
        xs_params.append(p.model_stats.parameters)

    # for easier interpretation report everything in million increments
    xs_madd_m = [x/1e6 for x in xs_madd]
    xs_flops_m = [x/1e6 for x in xs_flops]
    xs_params_m = [x/1e6 for x in xs_params]

    madds_plot_filename = os.path.join(expdir, 'model_gallery_accuracy_madds.png')

    plt.clf()
    plt.scatter(xs_madd_m, ys)
    plt.xlabel('Multiply-Additions (Millions)')
    plt.ylabel('Top1 Accuracy')
    plt.savefig(madds_plot_filename, dpi=plt.gcf().dpi, bbox_inches='tight')

    flops_plot_filename = os.path.join(expdir, 'model_gallery_accuracy_flops.png')

    plt.clf()
    plt.scatter(xs_flops_m, ys)
    plt.xlabel('Flops (Millions)')
    plt.ylabel('Top1 Accuracy')
    plt.savefig(flops_plot_filename, dpi=plt.gcf().dpi, bbox_inches='tight')

    params_plot_filename = os.path.join(expdir, 'model_gallery_accuracy_params.png')

    plt.clf()
    plt.scatter(xs_params_m, ys)
    plt.xlabel('Params (Millions)')
    plt.ylabel('Top1 Accuracy')
    plt.savefig(params_plot_filename, dpi=plt.gcf().dpi, bbox_inches='tight')



[docs]def plot_seed_model_stats(seed_model_stats:List[ModelStats], expdir:str)->None:
    xs_madd = [p.MAdd for p in seed_model_stats]
    xs_madd_m = [x/1e6 for x in xs_madd]
    ys_zero = [0 for x in xs_madd]

    madds_plot_filename = os.path.join(expdir, 'seed_models_madds.png')
    plt.clf()
    plt.scatter(xs_madd_m, ys_zero)
    plt.xlabel('Multiply-Additions (Millions)')
    plt.savefig(madds_plot_filename, dpi=plt.gcf().dpi, bbox_inches='tight')


[docs]def save_hull_frontier(hull_points:List[ConvexHullPoint], convex_hull_eps:float,
               final_desc_foldername:str, expdir:str, stage:ExperimentStage=ExperimentStage.SEARCH)->ConvexHullPoint:
    # make folder to save gallery of models after search
    final_desc_dir = utils.full_path(final_desc_foldername, create=True)

    # save the front on hull
    front_points, eps_points, xs, ys = model_descs_on_front(hull_points,
                                                            convex_hull_eps,
                                                            stage)
    for i, eps_point in enumerate(eps_points):
        # save readable model desc yaml
        eps_point.model_desc.save(os.path.join(final_desc_dir, f'model_desc_{i}.yaml'))
        # save hull point
        eps_point.model_desc.clear_trainables() # make file lightweight
        utils.write_string(os.path.join(final_desc_dir, f'hull_{i}.yaml'), yaml.dump(eps_point))

    front_summary_filepath = os.path.join(expdir, 'pareto_front_summary.tsv')
    utils.write_string(front_summary_filepath, hull_points2tsv(front_points))

    eps_summary_filepath = os.path.join(expdir, 'pareto_eps_summary.tsv')
    utils.write_string(eps_summary_filepath, hull_points2tsv(eps_points))

    xy_filepath = os.path.join(expdir, 'pareto_xy.tsv')
    utils.write_string(xy_filepath, '\n'.join([str(x)+'\t'+str(y) \
                                                for x,y in utils.zip_eq(xs, ys)]))
    # return last model as best performing
    return eps_points[-1]

[docs]def save_hull(hull_points:List[ConvexHullPoint], expdir:str)->None:
    full_pool_filepath = os.path.join(expdir, 'full_pool.tsv')
    utils.write_string(full_pool_filepath, hull_points2tsv(hull_points))