Source code for archai.supergraph.algos.divnas.seqopt

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import List

import numpy as np

from archai.supergraph.algos.divnas.wmr import Wmr


[docs]class SeqOpt: """ Implements SeqOpt TODO: Later on we might want to refactor this class to be able to handle bandit feedback """ def __init__(self, num_items:int, eps:float): self._num_items = num_items # initialize wmr copies self._expert_algos = [Wmr(self._num_items, eps) for i in range(self._num_items)]
[docs] def sample_sequence(self, with_replacement=False)->List[int]: sel_set = set() # to keep order information sel_list = [] counter = 0 counter_limit = 10000 for i in range(self._num_items): item_id = self._expert_algos[i].sample() if not with_replacement: # NOTE: this might be an infinite while loop while item_id in sel_set and counter < counter_limit: item_id = self._expert_algos[i].sample() counter += 1 if counter >= counter_limit: print('Got caught in infinite loop for a while') sel_set.add(item_id) sel_list.append(item_id) return sel_list
def _check_marg_gains(self, reward_storage:List[List[float]])->bool: reward_array = np.array(reward_storage) is_descending = True for i in range(reward_array.shape[1]): marg_gains_this_item = reward_array[:,i] is_descending = np.all(np.diff(marg_gains_this_item)<=0) if not is_descending: return is_descending return is_descending def _scale_minus_one_to_one(self, rewards:np.array)->np.array: scaled = np.interp(rewards, (rewards.min(), rewards.max()), (-1, 1)) return scaled
[docs] def update(self, sel_list:List[int], compute_marginal_gain_func)->None: """ In the full information case we will update all expert copies according to the marginal benefits """ # mother set S = set([i for i in range(self._num_items)]) reward_storage = [] # for each slot for slot_id in range(self._num_items): # for each action in the slot sub_sel = set(sel_list[:slot_id]) reward_vector = [] for item in range(self._num_items): # the function passed in # must already be bound to the # covariance function needed reward = compute_marginal_gain_func(item, sub_sel, S) reward_vector.append(reward) # update the expert algo copy for this slot scaled_rewards = self._scale_minus_one_to_one(np.array(reward_vector)) self._expert_algos[slot_id].update(scaled_rewards) reward_storage.append(reward_vector)
# # Uncomment to aid in debugging # np.set_printoptions(precision=3, suppress=True) # print('Marginal gain array (item_id X slots)') # print(np.array(reward_storage).T) # is_descending = self._check_marg_gains(reward_storage) # if not is_descending: # print('WARNING marginal gains are not diminishing')