Source code for archai.onnx.export_utils

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

import types

import torch
from onnx import helper, load_model, numpy_helper, save
from onnxruntime.transformers import quantize_helper

from archai.onnx.onnx_forward import gpt2_onnx_forward


[docs]def prepare_model_for_onnx(model: torch.nn.Module, model_type: str) -> torch.nn.Module:
    """Prepare a PyTorch model for ONNX export by modifying the forward function and performing
    any additional pre-processing steps.

    Args:
        model: Instance of the model to prepare for ONNX export.
        model_type: Type of model.

    Returns:
        The prepared PyTorch model, ready for ONNX export.

    """

    # For GPT-2 architectures, we replace their forward function
    # and converts Conv1D to Linear layers
    if model_type in ["gpt2", "gpt2-flex"]:
        model.forward = types.MethodType(gpt2_onnx_forward, model)

        for layer in model.transformer.h:
            quantize_helper.conv1d_to_linear(layer.mlp)

    # Ensures evaluation model to disable dropout
    model.eval()

    return model


[docs]def weight_sharing(onnx_model_path: str, model_type: str) -> None:
    """Share weights between embedding and softmax layers in an ONNX model.

    Args:
        onnx_model_path: Path to the ONNX model that will have weights shared.
        model_type: Type of model to share the weights.

    """

    # Finds nodes in the graph based on their input name
    def _find_nodes_by_input(nodes, input_name):
        return [name for name in nodes.keys() if input_name in nodes[name].input]

    # Finds weights in the graph based on their shape
    def _find_weights_by_shape(weights, shape):
        return [name for name in weights.keys() if numpy_helper.to_array(weights[name]).shape == shape]

    # Loads the ONNX model
    model = load_model(onnx_model_path)

    # Gathers weights and nodes from the loaded model
    weights = {w.name: w for w in model.graph.initializer}
    nodes = {n.name: n for n in model.graph.node}

    if model_type in ["gpt2", "gpt2-flex"]:
        n_emb_weight = 1
        n_cutoffs = 0
    else:
        raise ValueError(f"model_type: {model_type} not supported for weight sharing.")

    for i in range(n_emb_weight):
        # Grabs the embedding weights pointer and removes from the graph
        emb_weight_name = f"word_emb.emb_layers.{i}.weight"
        if model_type in ["gpt2", "gpt2-flex"]:
            emb_weight_name = "transformer.wte.weight"

        emb_weight = numpy_helper.to_array(weights[emb_weight_name])
        model.graph.initializer.remove(weights[emb_weight_name])

        # Replaces the duplicated embedding weights by the softmax ones
        softmax_shape = (emb_weight.shape[1], emb_weight.shape[0])
        if i == 0:
            softmax_shape = (emb_weight.shape[1], emb_weight.shape[0] + n_cutoffs)
        softmax_weight = _find_weights_by_shape(weights, softmax_shape)[0]
        emb_gather_name = _find_nodes_by_input(nodes, emb_weight_name)[0]
        nodes[emb_gather_name].attribute.append(helper.make_attribute("axis", 1))
        nodes[emb_gather_name].input[0] = softmax_weight

        # Adds a "Transpose" node to invert the new embedding weights
        permute_dim = [1, 2, 0]
        if n_cutoffs != 0:
            permute_dim = [1, 0, 2]
        emb_gather_output = nodes[emb_gather_name].output[0]
        transpose_node_output = f"transposed_out_{i}"
        transpose_node = helper.make_node("Transpose", [emb_gather_output], [transpose_node_output], perm=permute_dim)
        model.graph.node.append(transpose_node)

        # Links the previous embedding output with the "Transpose" node
        emb_gather = _find_nodes_by_input(nodes, emb_gather_output)[0]
        nodes[emb_gather].input[0] = transpose_node_output

    # Saves the ONNX model
    save(model, onnx_model_path)