Configuration Utilities#

ONNX Configuration (Base)#

class archai.onnx.config_utils.onnx_config_base.OnnxConfig(config: PretrainedConfig, task: str | None = 'causal-lm')[source]#

Base ONNX configuration.

This class defines a base ONNX configuration for a specific task, which includes the input and output structure required for ONNX models, as well as additional properties and methods for handling ONNX Runtime graph optimization.

DEFAULT_TASK_OUTPUTS = {'causal-lm': {'probs': {0: 'batch_size'}}}#
property is_ort_graph_optimizable: bool#

Return whether configuration supports additional graph optimization.

property ort_graph_optimizer_args: Tuple[Any, ...]#

Return additional arguments used by the ORT graph optimizer.

get_inputs() Mapping[str, Mapping[int, str]][source]#

Get the ONNX-based inputs structure.

Returns:

ONNX-based inputs.

get_outputs() Mapping[str, Mapping[int, str]][source]#

Get the ONNX-based outputs structure.

Returns:

ONNX-based outputs.

generate_dummy_inputs(batch_size: int | None = 2, seq_len: int | None = 8) Mapping[str, Tensor][source]#

Generate dummy inputs for the ONNX exporter.

Parameters:
  • batch_size – Batch size.

  • seq_len – Sequence length.

Returns:

Keyword arguments for the model’s forward() function.

class archai.onnx.config_utils.onnx_config_base.OnnxConfigWithPast(config: PretrainedConfig, task: str | None = 'causal-lm', use_past: bool | None = False, past_key_values: int | None = 2)[source]#

ONNX configuration with support for past key/values.

This class is a subclass of OnnxConfig that adds the ability to use past key/values (also known as ‘use_cache’) in the model’s ONNX export.

property hidden_size: int#

Return the dimensionality of hidden units.

property num_layers: int#

Return the number of layers.

property num_attention_heads: int#

Return the number of attention heads.

get_inputs() Mapping[str, Mapping[int, str]][source]#

Get the ONNX-based inputs structure.

Returns:

ONNX-based inputs.

get_outputs() Mapping[str, Mapping[int, str]][source]#

Get the ONNX-based outputs structure.

Returns:

ONNX-based outputs.

generate_dummy_inputs(batch_size: int | None = 2, seq_len: int | None = 8, past_seq_len: int | None = 8) Mapping[str, Tensor][source]#

Generate dummy inputs for the ONNX exporter.

Parameters:
  • batch_size – Batch size.

  • seq_len – Sequence length.

  • past_seq_len – Past key/values sequence length.

Returns:

Keyword arguments for the model’s forward() function.

CodeGen ONNX Configuration#

class archai.onnx.config_utils.codegen_onnx_config.CodeGenOnnxConfig(config: PretrainedConfig, task: str | None = 'causal-lm', use_past: bool | None = False)[source]#

CodeGen ONNX configuration (with past key/values support).

property num_layers: int#

Return the number of layers.

property is_ort_graph_optimizable: bool#

Return whether configuration supports additional graph optimization.

property ort_graph_optimizer_args: Tuple[Any, ...]#

Return additional arguments used by the ORT graph optimizer.

GPT-2 ONNX Configuration#

class archai.onnx.config_utils.gpt2_onnx_config.GPT2OnnxConfig(config: PretrainedConfig, task: str | None = 'causal-lm', use_past: bool | None = False)[source]#

GPT-2 ONNX configuration (with past key/values support).

property num_layers: int#

Return the number of layers.

property is_ort_graph_optimizable: bool#

Return whether configuration supports additional graph optimization.

property ort_graph_optimizer_args: Tuple[Any, ...]#

Return additional arguments used by the ORT graph optimizer.

class archai.onnx.config_utils.gpt2_onnx_config.GPT2FlexOnnxConfig(config: PretrainedConfig, task: str | None = 'causal-lm', use_past: bool | None = False)[source]#

GPT-2 Flex ONNX configuration (with past key/values support).

property num_layers: int#

Return the number of layers.

property is_ort_graph_optimizable: bool#

Return whether configuration supports additional graph optimization.

property ort_graph_optimizer_args: Tuple[Any, ...]#

Return additional arguments used by the ORT graph optimizer.

generate_dummy_inputs(batch_size: int = 2, seq_len: int = 8, past_seq_len: int = 8) Mapping[str, Tensor][source]#

Generate dummy inputs for the ONNX exporter.

Parameters:
  • batch_size – Batch size.

  • seq_len – Sequence length.

  • past_seq_len – Past key/values sequence length.

Returns:

Keyword arguments for the model’s forward() function.