Source code for block_zoo.normalizations.LayerNorm

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT license.

import torch
import torch.nn as nn

from block_zoo.BaseLayer import BaseLayer,BaseConf
from utils.DocInherit import DocInherit
import copy

[docs]class LayerNormConf(BaseConf):
    """ Configuration of LayerNorm Layer

    """
    def __init__(self,**kwargs):
        super(LayerNormConf, self).__init__(**kwargs)

    # @DocInherit
    # def default(self):

[docs]    @DocInherit
    def declare(self):
        self.num_of_inputs = 1
        self.input_ranks = [3]

[docs]    @DocInherit
    def inference(self):
        self.output_dim = copy.deepcopy(self.input_dims[0])
        super(LayerNormConf, self).inference()

[docs]    @DocInherit
    def verify(self):
        super(LayerNormConf, self).verify()

[docs]class LayerNorm(nn.Module):
    """ LayerNorm layer

    Args:
        layer_conf (LayerNormConf): configuration of a layer

    """
    def __init__(self,layer_conf):
        super(LayerNorm, self).__init__()
        self.layer_conf = layer_conf
        self.g = nn.Parameter(torch.ones(self.layer_conf.input_dims[0][-1]))
        self.b = nn.Parameter(torch.zeros(self.layer_conf.input_dims[0][-1]))
        self.e = 1e-5

[docs]    def forward(self, string, string_len):
        """ process input

        Args:
            string, string_len
            e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size]

        Returns:
            Tensor: [batch_size, seq_len, output_dim], [batch_size]
        """
        u = string.mean(-1,keepdim=True)
        s = (string - u).pow(2).mean(-1,keepdim=True)
        string = (string - u)/torch.sqrt(s+self.e)
        return self.g * string + self.b, string_len