Source code for archai.trainers.coin_betting_optimizer

# Copyright (c) @IssamLaradji.
# https://github.com/IssamLaradji/sls/blob/master/src/optimizers/others/cocob.py

import math
from typing import Any, Callable, Dict, Iterable, Optional, Union

import torch
from torch import optim


[docs]class CocobBackprop(optim.Optimizer): """Coin Betting optimizer with Backpropagation. It has been proposed in `Training Deep Networks without Learning Rates Through Coin Betting`. Reference: https://arxiv.org/pdf/1705.07795.pdf """ def __init__( self, params: Union[Iterable, Dict[str, Any]], alpha: Optional[float] = 100.0, eps: Optional[float] = 1e-8 ) -> None: """Initialize the optimizer. Args: params: Iterable of parameters to optimize or dicts defining parameter groups. alpha: Positive number to adjust betting fraction. Theoretical convergence gauarantee does not depend on choice of `alpha`. eps: Positive initial wealth for betting algorithm. Theoretical convergence gauarantee does not depend on choice of `eps`. """ self.alpha = alpha self.eps = eps defaults = dict(alpha=alpha, eps=eps) super(CocobBackprop, self).__init__(params, defaults)
[docs] def step(self, closure: Optional[Callable] = None) -> torch.FloatTensor: loss = None if closure is not None: loss = closure() for group in self.param_groups: for param in group["params"]: if param.grad is None: continue grad = param.grad.data state = self.state[param] param_shape = param.shape # Better bets for -ve gradient neg_grad = -grad if len(state) == 0: # Happens only once at the begining of optimization start # Set initial parameter weights and zero reward state["initial_weight"] = param.data state["reward"] = param.new_zeros(param_shape) # Don't bet anything for first round state["bet"] = param.new_zeros(param_shape) # Initialize internal states useful for computing betting fraction state["neg_grads_sum"] = param.new_zeros(param_shape) state["grads_abs_sum"] = param.new_zeros(param_shape) state["max_observed_scale"] = self.eps * param.new_ones(param_shape) # load states in variables initial_weight = state["initial_weight"] reward = state["reward"] bet = state["bet"] neg_grads_sum = state["neg_grads_sum"] grads_abs_sum = state["grads_abs_sum"] max_observed_scale = state["max_observed_scale"] # Update internal states useful for computing betting fraction max_observed_scale = torch.max(max_observed_scale, torch.abs(grad)) grads_abs_sum += torch.abs(grad) neg_grads_sum += neg_grad # Based on how much the Better bets on -ve gradient prediction, # check how much the Better won (-ve if lost) win_amount = bet * neg_grad # Update better's reward. Negative reward is not allowed. reward = torch.max(reward + win_amount, torch.zeros_like(reward)) # Better decides the bet fraction based on so-far observations bet_fraction = neg_grads_sum / ( max_observed_scale * (torch.max(grads_abs_sum + max_observed_scale, self.alpha * max_observed_scale)) ) # Better makes the bet according to decided betting fraction. bet = bet_fraction * (max_observed_scale + reward) # Set parameter weights param.data = initial_weight + bet # save state back in memory state["neg_grads_sum"] = neg_grads_sum state["grads_abs_sum"] = grads_abs_sum state["max_observed_scale"] = max_observed_scale state["reward"] = reward state["bet"] = bet # For Cocob-Backprop bet_fraction need not be maintained in state. Only kept for visualization. state["bet_fraction"] = bet_fraction return loss
[docs]class CocobOns(optim.Optimizer): """Coin Betting optimizer with Online Learning. It has been proposed in `Black-Box Reductions for Parameter-free Online Learning in Banach Spaces`. Reference: https://arxiv.org/pdf/1705.07795.pdf """ def __init__(self, params: Union[Iterable, Dict[str, Any]], eps: Optional[float] = 1e-8): """Initialize the optimizer. Args: params: Iterable of parameters to optimize or dicts defining parameter groups. eps: Positive initial wealth for betting algorithm. Theoretical convergence gauarantee does not depend on choice of `eps`. """ self.eps = eps defaults = dict(eps=eps) super(CocobOns, self).__init__(params, defaults)
[docs] def step(self, closure: Optional[Callable] = None) -> torch.FloatTensor: loss = None if closure is not None: loss = closure() for group in self.param_groups: for param in group["params"]: if param.grad is None: continue grad = param.grad.data state = self.state[param] param_shape = param.data.shape # Clip gradients to be in (-1, 1) grad.clamp_(-1.0, 1.0) # Better bets for -ve gradient neg_grad = -grad if len(state) == 0: # Happens only once at the begining of optimization start # Set initial parameter weights and zero reward state["initial_weight"] = param.data state["wealth"] = self.eps * param.new_ones(param_shape) # Don't bet anything for first round state["bet_fraction"] = param.new_zeros(param_shape) state["bet"] = param.new_zeros(param_shape) # Initialize internal states useful for computing betting fraction state["z_square_sum"] = param.new_zeros(param_shape) # load states in memory wealth = state["wealth"] bet_fraction = state["bet_fraction"] z_square_sum = state["z_square_sum"] initial_weight = state["initial_weight"] bet = state["bet"] # Based on how much the Better bets on -ve gradient prediction, # check how much the Better won (-ve if lost) win_amount = bet * neg_grad # Update better's wealth based on what he won / lost. wealth = wealth + win_amount # Better decides the bet fraction based on so-far observations # z, A variable notations from Algo 1 in paper) z = grad / (1 - (bet_fraction * grad)) z_square_sum = z_square_sum + (z * z) A = 1 + z_square_sum bet_fraction = bet_fraction - (2 / (2 - math.log(3))) * (z / A) bet_fraction.clamp_(-0.5, 0.5) # Better makes the bet according to decided betting fraction. bet = bet_fraction * wealth # Set parameter weights param.data = initial_weight + bet # save state back in memory state["bet_fraction"] = bet_fraction state["wealth"] = wealth state["z_square_sum"] = z_square_sum state["bet"] = bet return loss