Source code for archai.trainers.coin_betting_optimizer

# Copyright (c) @IssamLaradji.
# https://github.com/IssamLaradji/sls/blob/master/src/optimizers/others/cocob.py

import math
from typing import Any, Callable, Dict, Iterable, Optional, Union

import torch
from torch import optim


[docs]class CocobBackprop(optim.Optimizer):
    """Coin Betting optimizer with Backpropagation.

    It has been proposed in `Training Deep Networks without Learning Rates
    Through Coin Betting`.

    Reference:
        https://arxiv.org/pdf/1705.07795.pdf

    """

    def __init__(
        self, params: Union[Iterable, Dict[str, Any]], alpha: Optional[float] = 100.0, eps: Optional[float] = 1e-8
    ) -> None:
        """Initialize the optimizer.

        Args:
            params: Iterable of parameters to optimize or dicts defining
                parameter groups.
            alpha: Positive number to adjust betting fraction. Theoretical convergence
                gauarantee does not depend on choice of `alpha`.
            eps: Positive initial wealth for betting algorithm. Theoretical convergence
                gauarantee does not depend on choice of `eps`.

        """

        self.alpha = alpha
        self.eps = eps
        defaults = dict(alpha=alpha, eps=eps)

        super(CocobBackprop, self).__init__(params, defaults)

[docs]    def step(self, closure: Optional[Callable] = None) -> torch.FloatTensor:
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for param in group["params"]:
                if param.grad is None:
                    continue

                grad = param.grad.data
                state = self.state[param]
                param_shape = param.shape

                # Better bets for -ve gradient
                neg_grad = -grad

                if len(state) == 0:
                    # Happens only once at the begining of optimization start
                    # Set initial parameter weights and zero reward
                    state["initial_weight"] = param.data
                    state["reward"] = param.new_zeros(param_shape)

                    # Don't bet anything for first round
                    state["bet"] = param.new_zeros(param_shape)

                    # Initialize internal states useful for computing betting fraction
                    state["neg_grads_sum"] = param.new_zeros(param_shape)
                    state["grads_abs_sum"] = param.new_zeros(param_shape)
                    state["max_observed_scale"] = self.eps * param.new_ones(param_shape)

                # load states in variables
                initial_weight = state["initial_weight"]
                reward = state["reward"]
                bet = state["bet"]
                neg_grads_sum = state["neg_grads_sum"]
                grads_abs_sum = state["grads_abs_sum"]
                max_observed_scale = state["max_observed_scale"]

                # Update internal states useful for computing betting fraction
                max_observed_scale = torch.max(max_observed_scale, torch.abs(grad))
                grads_abs_sum += torch.abs(grad)
                neg_grads_sum += neg_grad

                # Based on how much the Better bets on -ve gradient prediction,
                # check how much the Better won (-ve if lost)
                win_amount = bet * neg_grad

                # Update better's reward. Negative reward is not allowed.
                reward = torch.max(reward + win_amount, torch.zeros_like(reward))

                # Better decides the bet fraction based on so-far observations
                bet_fraction = neg_grads_sum / (
                    max_observed_scale
                    * (torch.max(grads_abs_sum + max_observed_scale, self.alpha * max_observed_scale))
                )

                # Better makes the bet according to decided betting fraction.
                bet = bet_fraction * (max_observed_scale + reward)

                # Set parameter weights
                param.data = initial_weight + bet

                # save state back in memory
                state["neg_grads_sum"] = neg_grads_sum
                state["grads_abs_sum"] = grads_abs_sum
                state["max_observed_scale"] = max_observed_scale
                state["reward"] = reward
                state["bet"] = bet
                # For Cocob-Backprop bet_fraction need not be maintained in state. Only kept for visualization.
                state["bet_fraction"] = bet_fraction

        return loss


[docs]class CocobOns(optim.Optimizer):
    """Coin Betting optimizer with Online Learning.

    It has been proposed in `Black-Box Reductions for Parameter-free
    Online Learning in Banach Spaces`.

    Reference:
        https://arxiv.org/pdf/1705.07795.pdf

    """

    def __init__(self, params: Union[Iterable, Dict[str, Any]], eps: Optional[float] = 1e-8):
        """Initialize the optimizer.

        Args:
            params: Iterable of parameters to optimize or dicts defining
                parameter groups.
            eps: Positive initial wealth for betting algorithm. Theoretical convergence
                gauarantee does not depend on choice of `eps`.

        """

        self.eps = eps
        defaults = dict(eps=eps)

        super(CocobOns, self).__init__(params, defaults)

[docs]    def step(self, closure: Optional[Callable] = None) -> torch.FloatTensor:
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            for param in group["params"]:
                if param.grad is None:
                    continue

                grad = param.grad.data
                state = self.state[param]
                param_shape = param.data.shape

                # Clip gradients to be in (-1, 1)
                grad.clamp_(-1.0, 1.0)

                # Better bets for -ve gradient
                neg_grad = -grad

                if len(state) == 0:
                    # Happens only once at the begining of optimization start
                    # Set initial parameter weights and zero reward
                    state["initial_weight"] = param.data
                    state["wealth"] = self.eps * param.new_ones(param_shape)

                    # Don't bet anything for first round
                    state["bet_fraction"] = param.new_zeros(param_shape)
                    state["bet"] = param.new_zeros(param_shape)

                    # Initialize internal states useful for computing betting fraction
                    state["z_square_sum"] = param.new_zeros(param_shape)

                # load states in memory
                wealth = state["wealth"]
                bet_fraction = state["bet_fraction"]
                z_square_sum = state["z_square_sum"]
                initial_weight = state["initial_weight"]
                bet = state["bet"]

                # Based on how much the Better bets on -ve gradient prediction,
                # check how much the Better won (-ve if lost)
                win_amount = bet * neg_grad

                # Update better's wealth based on what he won / lost.
                wealth = wealth + win_amount

                # Better decides the bet fraction based on so-far observations
                # z, A variable notations from Algo 1 in paper)
                z = grad / (1 - (bet_fraction * grad))
                z_square_sum = z_square_sum + (z * z)
                A = 1 + z_square_sum

                bet_fraction = bet_fraction - (2 / (2 - math.log(3))) * (z / A)
                bet_fraction.clamp_(-0.5, 0.5)

                # Better makes the bet according to decided betting fraction.
                bet = bet_fraction * wealth

                # Set parameter weights
                param.data = initial_weight + bet

                # save state back in memory
                state["bet_fraction"] = bet_fraction
                state["wealth"] = wealth
                state["z_square_sum"] = z_square_sum
                state["bet"] = bet

        return loss