Source code for archai.supergraph.algos.divnas.analyse_activations

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import math as ma
import os
from collections import defaultdict
from copy import deepcopy
from itertools import combinations
from typing import Any, Callable, Dict, List, Set, Tuple

import h5py
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

from archai.supergraph.algos.divnas.seqopt import SeqOpt
from archai.supergraph.utils.heatmap import heatmap


[docs]def create_submod_f(covariance:np.array)->Callable: def compute_marginal_gain_func(item:int, sub_sel:List[int], S:Set[int]): assert covariance.shape[0] == covariance.shape[1] assert len(covariance.shape) == 2 assert len(S) == covariance.shape[0] sel_set = set(sub_sel) marg_gain = compute_marginal_gain(item, sel_set, S, covariance) return marg_gain return compute_marginal_gain_func
[docs]def get_batch(feature_list, batch_size, i): start_row = batch_size * i end_row = start_row + batch_size feats = [feat[start_row:end_row, :] for feat in feature_list] return feats
[docs]def rbf(x:np.array, y:np.array, sigma=0.1)->np.array: """ Computes the rbf kernel between two input vectors """ # make sure that inputs are vectors assert len(x.shape) == 1 assert len(y.shape) == 1 sq_euclidean = np.sum(np.square(x-y)) k = np.exp(-sq_euclidean/(2*sigma*sigma)) return k
def _compute_mi(cov_kernel:np.array, A:Set, V_minus_A:Set): sigma_A = cov_kernel[np.ix_(list(A), list(A))] sigma_V_minus_A = cov_kernel[np.ix_(list(V_minus_A), list(V_minus_A))] I = 0.5 * np.log(np.linalg.det(sigma_A) * np.linalg.det(sigma_V_minus_A) / np.linalg.det(cov_kernel)) return I
[docs]def compute_brute_force_sol(cov_kernel:np.array, budget:int)->Tuple[Tuple[Any], float]: assert cov_kernel.shape[0] == cov_kernel.shape[1] assert len(cov_kernel.shape) == 2 assert budget > 0 and budget <= cov_kernel.shape[0] V = set(range(cov_kernel.shape[0])) # for each combination of budgeted items compute its mutual # information with the complement set mis = [] for subset in combinations(range(cov_kernel.shape[0]), budget): A = set(subset) V_minus_A = V - A I = _compute_mi(cov_kernel, A, V_minus_A) mis.append((subset, I)) # find the maximum subset max_subset, mi = max(mis, key = lambda x: x[1]) return max_subset, mi
[docs]def compute_correlation(covariance:np.array)->np.array: variance = np.diag(covariance).reshape(-1, 1) stds = np.sqrt(np.matmul(variance, variance.T)) correlation = covariance / (stds + 1e-16) return correlation
[docs]def compute_covariance_offline(feature_list:List[np.array])->np.array: """Compute covariance matrix for high-dimensional features. feature_shape: (num_samples, feature_dim) """ num_features = len(feature_list) num_samples = feature_list[0].shape[0] flatten_features = [ feas.reshape(num_samples, -1) for feas in feature_list] unbiased_features = [ feas - np.mean(feas, 0) for feas in flatten_features] # (num_samples, feature_dim, num_features) features = np.stack(unbiased_features, -1) covariance = np.zeros((num_features, num_features), np.float32) for i in range(num_samples): covariance += np.matmul(features[i].T, features[i]) return covariance
[docs]def compute_rbf_kernel_covariance(feature_list:List[np.array], sigma=0.1)->np.array: """ Compute rbf kernel covariance for high dimensional features. feature_list: List of features each of shape: (num_samples, feature_dim) sigma: sigma of the rbf kernel """ num_features = len(feature_list) covariance = np.zeros((num_features, num_features), np.float32) for i in range(num_features): for j in range(num_features): if i == j: covariance[i][j] = covariance[j][i] = 1.0 continue # NOTE: one could try to take all pairs rbf responses # but that is too much computation and probably does # not add much information feats_i = feature_list[i] feats_j = feature_list[j] assert feats_i.shape == feats_j.shape rbfs = np.exp(-np.sum(np.square(feats_i - feats_j), axis=1) / (2*sigma*sigma)) avg_cov = np.sum(rbfs)/feats_i.shape[0] covariance[i][j] = covariance[j][i] = avg_cov return covariance
[docs]def compute_euclidean_dist_quantiles(feature_list:List[np.array], subsamplefactor=1)->List[Tuple[float, float]]: """ Compute quantile distances between feature pairs feature_list: List of features each of shape: (num_samples, feature_dim) """ num_features = len(feature_list) num_samples = feature_list[0].shape[0] # (num_samples, feature_dim, num_features) features = np.stack(feature_list, -1) # compute all pairwise feature distances # too slow need to vectorize asap distances = [] for i in range(num_features): for j in range(num_features): if i == j: continue for k in range(0, num_samples, subsamplefactor): feat_i = features[k, :][:, i] feat_j = features[k, :][:, j] dist = np.sqrt(np.sum(np.square(feat_i-feat_j))) distances.append(dist) quantiles = [i*0.1 for i in range(1, 10)] quant_vals = np.quantile(distances, quantiles) quants = [] for quant, val in zip(quantiles, quant_vals.tolist()): quants.append((quant, val)) return quants
[docs]def greedy_op_selection(covariance:np.array, k:int)->List[int]: assert covariance.shape[0] == covariance.shape[1] assert len(covariance.shape) == 2 assert k <= covariance.shape[0] A = set() # to keep order information A_list = [] S = set() for i in range(covariance.shape[0]): S.add(i) for i in tqdm(range(k)): marginal_gains = [] marginal_gain_ids = [] for y in S - A: delta_y = compute_marginal_gain(y, A, S, covariance) marginal_gains.append(delta_y) marginal_gain_ids.append(y) val = -ma.inf argmax = -1 for marg_gain, marg_gain_id in zip(marginal_gains, marginal_gain_ids): if marg_gain > val: val = marg_gain argmax = marg_gain_id A.add(argmax) A_list.append(argmax) return A_list
[docs]def compute_marginal_gain(y:int, A:Set[int], S:Set[int], covariance:np.array)->float: if A: A_copy = deepcopy(A) A_copy.add(y) else: A_copy = set() A_copy.add(y) A_bar = S - A_copy sigma_y_sqr = covariance[y, y] if A: sigma_AA = covariance[np.ix_(list(A), list(A))] sigma_yA = covariance[np.ix_([y], list(A))] numerator = sigma_y_sqr - np.matmul(sigma_yA, np.matmul(np.linalg.inv(sigma_AA), sigma_yA.T)) else: numerator = sigma_y_sqr if A_bar: sigma_AA_bar = covariance[np.ix_(list(A_bar), list(A_bar))] sigma_yA_bar = covariance[np.ix_([y], list(A_bar))] denominator = sigma_y_sqr - np.matmul(sigma_yA_bar, np.matmul(np.linalg.inv(sigma_AA_bar), sigma_yA_bar.T)) else: denominator = sigma_y_sqr gain = numerator/denominator return float(gain)
[docs]def collect_features(rootfolder:str, subsampling_factor:int = 1)->Dict[str, List[np.array]]: """ Walks the rootfolder for h5py files and loads them into the format required for analysis. Inputs: rootfolder: full path to folder containing h5 files which have activations subsampling_factor: every nth minibatch will be loaded to keep memory manageable Outputs: dictionary with edge name strings as keys and values are lists of np.array [num_samples, feature_dim] """ assert subsampling_factor > 0 # gather all h5 files h5files = [os.path.join(rootfolder, f) for f in os.listdir(rootfolder) if os.path.isfile(os.path.join(rootfolder, f)) and '.h5' in f] assert h5files # storage for holding activations for all edges all_edges_activs = defaultdict(list) for h5file in h5files: with h5py.File(h5file, 'r') as hf: edge_name = h5file.split('/')[-1].split('.')[-2] edge_activ_list = [] # load all batches keys_list = list(hf.keys()) print(f'processing {h5file}, num batches {len(keys_list)}') for i in range(0, len(keys_list), subsampling_factor): key = keys_list[i] payload = np.array(hf.get(key)) edge_activ_list.append(payload) obsv_dict = defaultdict(list) # separate activations by ops for batch in edge_activ_list: # assumption (num_ops, batch_size, x, y, z) for op in range(batch.shape[0]): for b in range(batch.shape[1]): feat = batch[op][b] feat = feat.flatten() obsv_dict[op].append(feat) num_ops = edge_activ_list[0].shape[0] feature_list = [np.zeros(1) for _ in range(num_ops)] for key in obsv_dict.keys(): feat = np.array(obsv_dict[key]) feature_list[key] = feat # removing none and skip_connect del feature_list[-1] del feature_list[2] all_edges_activs[edge_name] = feature_list return all_edges_activs
[docs]def plot_all_covs(covs_kernel, corr, primitives, axs): assert axs.shape[0] * axs.shape[1] == len(covs_kernel) + 1 flat_axs = axs.flatten() for i, quantile in enumerate(covs_kernel.keys()): cov = covs_kernel[quantile] heatmap(cov, ax=flat_axs[i], xtick_labels=primitives, ytick_labels=primitives, fmt=".1g", cmap="coolwarm") flat_axs[i].set_title(f'Kernel covariance sigma={quantile} quantile') heatmap(corr, ax=flat_axs[-1], xtick_labels=primitives, ytick_labels=primitives, fmt=".1g", cmap="coolwarm") flat_axs[-1].set_title(f'Correlation')
[docs]def main(): rootfile = '/media/dedey/DATADRIVE1/activations' all_edges_activs = collect_features(rootfile, subsampling_factor=5) PRIMITIVES = [ 'max_pool_3x3', 'avg_pool_3x3', 'sep_conv_3x3', 'sep_conv_5x5', 'dil_conv_3x3', 'dil_conv_5x5', ] # # Use all edges # all_edges_list = [] # all_names_list = [] # for i in all_edges_activs.keys(): # all_edges_list.extend(all_edges_activs[i]) # for prim in PRIMITIVES: # all_names_list.append(i + '_' + prim) # Use specific edges all_edges_list = [] all_names_list = [] # edge_list = ['activations_node_0_edge_0'] edge_list = ['activations_node_0_edge_0', 'activations_node_0_edge_1'] # edge_list = ['activations_node_1_edge_0', 'activations_node_1_edge_1', 'activations_node_1_edge_2'] # edge_list = ['activations_node_2_edge_0', 'activations_node_2_edge_1', 'activations_node_2_edge_2', 'activations_node_2_edge_3'] # edge_list = ['activations_node_3_edge_0', 'activations_node_3_edge_1', 'activations_node_3_edge_2', 'activations_node_3_edge_3', 'activations_node_3_edge_4'] for name in edge_list: all_edges_list.extend(all_edges_activs[name]) for prim in PRIMITIVES: all_names_list.append(name + '_' + prim) # compute covariance like usual # cov = compute_covariance_offline(all_edges_list) # corr = compute_correlation(cov) # heatmap(corr, xtick_labels=all_names_list, ytick_labels=all_names_list, cmap='coolwarm') # plt.axis('equal') # plt.show() # compute kernel covariance # quants = compute_euclidean_dist_quantiles(all_edges_list, subsamplefactor=20) cov_kernel_orig = compute_rbf_kernel_covariance(all_edges_list, sigma=168) cov_kernel = cov_kernel_orig + 1.0*np.eye(cov_kernel_orig.shape[0]) print(f'Det before diag addition {np.linalg.det(cov_kernel_orig)}') print(f'Det after diag addition {np.linalg.det(cov_kernel)}') print(f'Condition number is {np.linalg.cond(cov_kernel)}') heatmap(cov_kernel, xtick_labels=all_names_list, ytick_labels=all_names_list, cmap="coolwarm") plt.axis('equal') plt.show() # brute force solution budget = 4 bf_sensors, bf_val = compute_brute_force_sol(cov_kernel_orig, budget) print(f'Brute force max subset {bf_sensors}, max mi {bf_val}') # greedy print('Greedy selection') greedy_ops = greedy_op_selection(cov_kernel, cov_kernel.shape[0]) for i, op_index in enumerate(greedy_ops): print(f'Greedy op {i} is {all_names_list[op_index]}') greedy_budget = greedy_ops[:budget] # find MI of the greedy solution V = set(range(cov_kernel.shape[0])) A_greedy = set(greedy_budget) V_minus_A_greedy = V - A_greedy I_greedy = _compute_mi(cov_kernel_orig, A_greedy, V_minus_A_greedy) print(f'Greedy solution is {greedy_budget}, mi is {I_greedy}') # seqopt # simulated batch size batch_size = 64 num_batches = int(all_edges_list[0].shape[0] / batch_size) # seqopt object that will get updated in an online manner num_items = cov_kernel.shape[0] eps = 0.1 seqopt = SeqOpt(num_items, eps) for i in tqdm(range(num_batches)): # simulate getting a new batch of activations sample = get_batch(all_edges_list, batch_size, i) # sample a list of activations from seqopt sel_list = seqopt.sample_sequence(with_replacement=False) # Using 50th percentile distance sigma = 168.0 cov = compute_rbf_kernel_covariance(sample, sigma=sigma) # update seqopt compute_marginal_gain_func = create_submod_f(cov) seqopt.update(sel_list, compute_marginal_gain_func) # now sample a list of ops and hope it is diverse sel_list = seqopt.sample_sequence(with_replacement=False) # sel_primitives = [all_names_list for i in sel_list] # print(f'SeqOpt selected primitives are {sel_primitives}') # check that it is close to greedy and or bruteforce budget = 4 sel_list = sel_list[:budget] # find MI of the greedy solution V = set(range(num_items)) A_seqopt = set(sel_list) V_minus_A_seqopt = V - A_seqopt I_seqopt = _compute_mi(cov_kernel_orig, A_seqopt, V_minus_A_seqopt) print(f'SeqOpt solution is {sel_list}, mi is {I_seqopt}')
# # For enumerating through many choices of rbf sigmas # covs_kernel = {} # for quantile, val in quants: # print(f'Computing kernel covariance for quantile {quantile}') # cov_kernel = compute_rbf_kernel_covariance(all_edges_list, sigma=val) # covs_kernel[quantile] = cov_kernel # # compute greedy sequence of ops on one of the kernels # print('Greedy selection') # greedy_ops = greedy_op_selection(covs_kernel[0.5], 3) # for i, op_index in enumerate(greedy_ops): # print(f'Greedy op {i} is {all_names_list[op_index]}') # fig, axs = plt.subplots(5, 2) # plot_all_covs(covs_kernel, corr, all_names_list, axs) # plt.show() if __name__ == '__main__': main()