Source code for archai.supergraph.algos.divnas.seqopt

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

from typing import List

import numpy as np

from archai.supergraph.algos.divnas.wmr import Wmr


[docs]class SeqOpt:
    """ Implements SeqOpt
        TODO: Later on we might want to refactor this class
        to be able to handle bandit feedback """

    def __init__(self, num_items:int, eps:float):
        self._num_items = num_items

        # initialize wmr copies
        self._expert_algos = [Wmr(self._num_items, eps) for i in range(self._num_items)]


[docs]    def sample_sequence(self, with_replacement=False)->List[int]:

        sel_set = set()
        # to keep order information
        sel_list = []

        counter = 0
        counter_limit = 10000

        for i in range(self._num_items):
            item_id = self._expert_algos[i].sample()
            if not with_replacement:
                # NOTE: this might be an infinite while loop
                while item_id in sel_set and counter < counter_limit:
                    item_id = self._expert_algos[i].sample()
                    counter += 1

                if counter >= counter_limit:
                    print('Got caught in infinite loop for a while')

            sel_set.add(item_id)
            sel_list.append(item_id)

        return sel_list


    def _check_marg_gains(self, reward_storage:List[List[float]])->bool:
        reward_array = np.array(reward_storage)

        is_descending = True
        for i in range(reward_array.shape[1]):
            marg_gains_this_item = reward_array[:,i]
            is_descending = np.all(np.diff(marg_gains_this_item)<=0)
            if not is_descending:
                return is_descending

        return is_descending


    def _scale_minus_one_to_one(self, rewards:np.array)->np.array:
        scaled = np.interp(rewards, (rewards.min(), rewards.max()), (-1, 1))
        return scaled

[docs]    def update(self, sel_list:List[int], compute_marginal_gain_func)->None:
        """ In the full information case we will update
        all expert copies according to the marginal benefits """

        # mother set
        S = set([i for i in range(self._num_items)])

        reward_storage = []

        # for each slot
        for slot_id in range(self._num_items):
            # for each action in the slot
            sub_sel = set(sel_list[:slot_id])
            reward_vector = []
            for item in range(self._num_items):
                # the function passed in
                # must already be bound to the
                # covariance function needed
                reward = compute_marginal_gain_func(item, sub_sel, S)
                reward_vector.append(reward)

            # update the expert algo copy for this slot
            scaled_rewards = self._scale_minus_one_to_one(np.array(reward_vector))
            self._expert_algos[slot_id].update(scaled_rewards)

            reward_storage.append(reward_vector)

        # # Uncomment to aid in debugging
        # np.set_printoptions(precision=3, suppress=True)
        # print('Marginal gain array (item_id X slots)')
        # print(np.array(reward_storage).T)

        # is_descending = self._check_marg_gains(reward_storage)
        # if not is_descending:
        #     print('WARNING marginal gains are not diminishing')