Index

A | B | C | D | E | F | G | H | I | K | L | M | N | O | P | Q | R | S | T | U | V | W | Z

A

a_bits
- command line option
a_per_token
- command line option
a_symmetric
- command line option
accuracy_level
- command line option
activation_symmetric
- command line option, [1], [2]
activation_type
- command line option, [1], [2]
ActivationSymmetric
- command line option
add_zero_point
- command line option

AddQDQPairToWeight
- command line option
algorithm
- command line option
all_tensors_to_one_file
- command line option, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
allow_tf32
- command line option, [1], [2], [3], [4], [5]
alpha
- command line option, [1], [2], [3], [4], [5]
alpha_pattern
- command line option, [1]
approach
- command line option, [1], [2]
atol
- command line option

B

backend
- command line option, [1], [2], [3]
batch_size
- command line option
binary_file
- command line option
bits
- command line option

block_size
- command line option
block_to_split
- command line option
blocksize
- command line option

C

calibrate_method
- command line option, [1], [2]
calibration_batch_size
- command line option
calibration_data_config
- command line option
calibration_nsamples
- command line option
calibration_providers
- command line option, [1]
calibration_sampling_size
- command line option, [1]
checkpoint_path
- command line option
command line option
- a_bits
- a_per_token
- a_symmetric
- accuracy_level
- activation_symmetric, [1], [2]
- activation_type, [1], [2]
- ActivationSymmetric
- add_zero_point
- AddQDQPairToWeight
- algorithm
- all_tensors_to_one_file, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- allow_tf32, [1], [2], [3], [4], [5]
- alpha, [1], [2], [3], [4], [5]
- alpha_pattern, [1]
- approach, [1], [2]
- atol
- backend, [1], [2], [3]
- batch_size
- binary_file
- bits
- block_size
- block_to_split
- blocksize
- calibrate_method, [1], [2]
- calibration_batch_size
- calibration_data_config
- calibration_nsamples
- calibration_providers, [1]
- calibration_sampling_size, [1]
- checkpoint_path
- components
- compress_to_fp16
- compute_dtype, [1]
- context_length
- convert_attribute, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- cost_model
- cpu_cores
- damp_percent
- data_config, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12]
- decompose_both
- decompose_factor
- desc_act
- device, [1], [2], [3], [4], [5], [6]
- device_map, [1], [2], [3], [4], [5]
- dim_param
- dim_value
- disable_cpu_fallback
- do_validate
- domain, [1], [2]
- double_quant
- duo_scaling
- dynamic
- dynamic_lora_r
- element_wise_binary_ops
- embed_context
- enable_cuda_graph, [1]
- enable_dpu
- enable_htp
- enable_profiling
- ephemeral_gpu_offload, [1], [2], [3], [4], [5]
- eval_data_config, [1], [2], [3], [4], [5]
- example_input_func
- exclude_embeds
- exclude_lm_head
- exclude_modules, [1]
- excluded_precisions, [1], [2]
- execution_mode_list
- export_compatible
- external_data_name, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- extra_args, [1], [2], [3], [4]
- extra_configs, [1]
- extra_options, [1], [2], [3]
- extra_session_config
- final_orientation
- float16, [1]
- force_evaluate_other_eps
- force_fp16_inputs
- force_fp32_nodes
- force_fp32_ops
- fp16, [1]
- fuse_layernorm
- gpus
- group_size
- hidden_size
- htp_socs
- ignored_scope
- ignored_scope_type
- init_weights, [1]
- input
- input_dim
- input_int32
- input_layouts
- input_model_dtype
- input_name
- input_names
- input_nodes
- input_shape
- input_shapes
- input_types
- inputs_to_make_channel_last
- inside_layer_modules
- int4_accuracy_level
- int4_block_size
- int4_op_types_to_quantize
- inter_thread_num_list
- intra_thread_num_list
- io_bind
- is_symmetric
- keep_io_types, [1]
- layer_name_filter, [1]
- layers_block_name
- layers_pattern, [1]
- layers_to_transform, [1]
- lib_name
- lib_targets
- loftq_iter
- logger
- lora_dropout, [1], [2], [3], [4], [5]
- make_inputs
- max_finite_val
- max_layer, [1]
- merge_adapter_weights
- metadata_only
- metric, [1], [2]
- min_layer, [1]
- min_positive_val
- min_real_range, [1]
- model_type, [1]
- module_dropout, [1]
- modules_to_fuse
- modules_to_not_convert
- modules_to_save, [1], [2], [3], [4], [5]
- name_pattern
- need_layer_fusing
- no_repeat_ngram_size
- node_block_list
- nodes_to_exclude, [1], [2], [3], [4], [5]
- nodes_to_include
- nodes_to_quantize, [1], [2], [3]
- num_epochs
- num_heads
- num_key_value_heads
- num_splits
- num_steps
- only_onnxruntime
- op_block_list, [1]
- op_type_dict, [1], [2]
- op_types_to_exclude, [1], [2]
- op_types_to_quantize, [1], [2], [3], [4]
- opt_level
- opt_level_list
- optimization_options
- optimize
- optimize_model
- optional_inputs
- out_node
- output_model
- output_names
- output_nodes
- outputs_to_make_channel_last
- outside_layer_modules
- overrides_config
- parallel_jobs
- past_key_value_name
- per_channel, [1], [2], [3]
- percdamp
- post
- pre
- precision
- prepare_qdq_config, [1]
- preset
- provider_options
- provider_options_list
- providers_list
- ptl_data_module
- ptl_module
- q_group_size
- qconfig_func
- quant_axes
- quant_format, [1], [2], [3], [4]
- quant_level, [1], [2]
- quant_mode, [1], [2], [3]
- quant_preprocess, [1], [2], [3]
- quant_type
- r, [1], [2], [3], [4], [5]
- rank_dropout, [1]
- rank_dropout_scale
- rank_pattern, [1]
- recipes, [1], [2]
- reduce_range, [1], [2], [3], [4], [5]
- rotate_mode, [1]
- round_interval
- save_as_external_data, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- save_format
- save_metadata_for_token_generation
- save_quant_config, [1]
- script_dir, [1], [2], [3], [4], [5], [6], [7]
- search
- seed, [1], [2], [3]
- session_options
- size_threshold, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- skip_onnx_shape
- skip_optimization
- skip_symbolic_shape
- source_dtype
- sparsity, [1]
- split_assignments
- static_groups
- strict
- surgeries
- sym
- target_device, [1]
- target_dtype
- target_modules, [1], [2], [3], [4], [5]
- target_opset, [1], [2], [3], [4]
- tensor_quant_overrides, [1]
- tool_command
- tool_command_args
- torch_dtype, [1], [2], [3], [4], [5], [6]
- train_data_config, [1], [2], [3], [4], [5], [6]
- training_args, [1], [2], [3], [4], [5], [6]
- training_loop_func
- trt_fp16_enable
- true_sequential
- tuning_criterion, [1], [2]
- unique_embeds_lm_head_splits
- use_dynamo_exporter
- use_effective_conv2d, [1]
- use_enhanced_quantizer
- use_external_data_format
- use_forced_decoder_ids
- use_gpu, [1]
- use_gqa
- use_int4
- use_logits_processor
- use_prefix_vocab_mask
- use_qdq
- use_symbolic_shape_infer
- use_temperature
- use_transpose_op
- use_vocab_mask
- user_script, [1], [2], [3], [4], [5], [6], [7]
- val_data_config
- version
- w_bit
- weight_only_config, [1], [2]
- weight_only_quant_configs
- weight_sharing
- weight_symmetric, [1], [2]
- weight_type, [1], [2], [3]
- WeightSymmetric
- workspace, [1], [2]
- zero_point

components
- command line option
compress_to_fp16
- command line option
compute_dtype
- command line option, [1]
context_length
- command line option
convert_attribute
- command line option, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
cost_model
- command line option
cpu_cores
- command line option

D

damp_percent
- command line option
data_config
- command line option, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12]
decompose_both
- command line option
decompose_factor
- command line option
desc_act
- command line option
device
- command line option, [1], [2], [3], [4], [5], [6]
device_map
- command line option, [1], [2], [3], [4], [5]
dim_param
- command line option

dim_value
- command line option
disable_cpu_fallback
- command line option
do_validate
- command line option
domain
- command line option, [1], [2]
double_quant
- command line option
duo_scaling
- command line option
dynamic
- command line option
dynamic_lora_r
- command line option

E

element_wise_binary_ops
- command line option
embed_context
- command line option
enable_cuda_graph
- command line option, [1]
enable_dpu
- command line option
enable_htp
- command line option
enable_profiling
- command line option
ephemeral_gpu_offload
- command line option, [1], [2], [3], [4], [5]
eval_data_config
- command line option, [1], [2], [3], [4], [5]
evaluation_strategy (olive.passes.pytorch.lora.HFTrainingArguments attribute)
example_input_func
- command line option
exclude_embeds
- command line option

exclude_lm_head
- command line option
exclude_modules
- command line option, [1]
excluded_precisions
- command line option, [1], [2]
execution_mode_list
- command line option
export_compatible
- command line option
external_data_name
- command line option, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
extra_args
- command line option, [1], [2], [3], [4]
extra_configs
- command line option, [1]
extra_options
- command line option, [1], [2], [3]
extra_session_config
- command line option

F

final_orientation
- command line option
float16
- command line option, [1]
force_evaluate_other_eps
- command line option
force_fp16_inputs
- command line option

force_fp32_nodes
- command line option
force_fp32_ops
- command line option
fp16
- command line option, [1]
fuse_layernorm
- command line option

G

gpus
- command line option

group_size
- command line option

H

hidden_size
- command line option

htp_socs
- command line option

I

ignored_scope
- command line option
ignored_scope_type
- command line option
init_weights
- command line option, [1]
input
- command line option
input_dim
- command line option
input_int32
- command line option
input_layouts
- command line option
input_model_dtype
- command line option
input_name
- command line option
input_names
- command line option
input_nodes
- command line option
input_shape
- command line option

input_shapes
- command line option
input_types
- command line option
inputs_to_make_channel_last
- command line option
inside_layer_modules
- command line option
int4_accuracy_level
- command line option
int4_block_size
- command line option
int4_op_types_to_quantize
- command line option
inter_thread_num_list
- command line option
intra_thread_num_list
- command line option
io_bind
- command line option
is_symmetric
- command line option

K

keep_io_types
- command line option, [1]

L

layer_name_filter
- command line option, [1]
layers_block_name
- command line option
layers_pattern
- command line option, [1]
layers_to_transform
- command line option, [1]
learning_rate (olive.passes.pytorch.lora.HFTrainingArguments attribute)
lib_name
- command line option

lib_targets
- command line option
loftq_iter
- command line option
logger
- command line option
lora_dropout
- command line option, [1], [2], [3], [4], [5]
lr_scheduler_type (olive.passes.pytorch.lora.HFTrainingArguments attribute)

M

make_inputs
- command line option
max_finite_val
- command line option
max_layer
- command line option, [1]
merge_adapter_weights
- command line option
metadata_only
- command line option
metric
- command line option, [1], [2]
min_layer
- command line option, [1]

min_positive_val
- command line option
min_real_range
- command line option, [1]
model_type
- command line option, [1]
module_dropout
- command line option, [1]
modules_to_fuse
- command line option
modules_to_not_convert
- command line option
modules_to_save
- command line option, [1], [2], [3], [4], [5]

N

name_pattern
- command line option
need_layer_fusing
- command line option
no_repeat_ngram_size
- command line option
node_block_list
- command line option
nodes_to_exclude
- command line option, [1], [2], [3], [4], [5]
nodes_to_include
- command line option

nodes_to_quantize
- command line option, [1], [2], [3]
num_epochs
- command line option
num_heads
- command line option
num_key_value_heads
- command line option
num_splits
- command line option
num_steps
- command line option

O

only_onnxruntime
- command line option
op_block_list
- command line option, [1]
op_type_dict
- command line option, [1], [2]
op_types_to_exclude
- command line option, [1], [2]
op_types_to_quantize
- command line option, [1], [2], [3], [4]
opt_level
- command line option
opt_level_list
- command line option
optim (olive.passes.pytorch.lora.HFTrainingArguments attribute)
optimization_options
- command line option
optimize
- command line option

optimize_model
- command line option
optional_inputs
- command line option
out_node
- command line option
output_model
- command line option
output_names
- command line option
output_nodes
- command line option
outputs_to_make_channel_last
- command line option
outside_layer_modules
- command line option
overrides_config
- command line option
overwrite_output_dir (olive.passes.pytorch.lora.HFTrainingArguments attribute)

P

parallel_jobs
- command line option
past_key_value_name
- command line option
per_channel
- command line option, [1], [2], [3]
percdamp
- command line option
post
- command line option
pre
- command line option
precision
- command line option

prepare_qdq_config
- command line option, [1]
preset
- command line option
provider_options
- command line option
provider_options_list
- command line option
providers_list
- command line option
ptl_data_module
- command line option
ptl_module
- command line option

Q

q_group_size
- command line option
qconfig_func
- command line option
quant_axes
- command line option
quant_format
- command line option, [1], [2], [3], [4]

quant_level
- command line option, [1], [2]
quant_mode
- command line option, [1], [2], [3]
quant_preprocess
- command line option, [1], [2], [3]
quant_type
- command line option

R

r
- command line option, [1], [2], [3], [4], [5]
rank_dropout
- command line option, [1]
rank_dropout_scale
- command line option
rank_pattern
- command line option, [1]

recipes
- command line option, [1], [2]
reduce_range
- command line option, [1], [2], [3], [4], [5]
resume_from_checkpoint (olive.passes.pytorch.lora.HFTrainingArguments attribute)
rotate_mode
- command line option, [1]
round_interval
- command line option

S

save_as_external_data
- command line option, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
save_format
- command line option
save_metadata_for_token_generation
- command line option
save_quant_config
- command line option, [1]
script_dir
- command line option, [1], [2], [3], [4], [5], [6], [7]
search
- command line option
seed
- command line option, [1], [2], [3]
session_options
- command line option
size_threshold
- command line option, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
skip_onnx_shape
- command line option

skip_optimization
- command line option
skip_symbolic_shape
- command line option
source_dtype
- command line option
sparsity
- command line option, [1]
split_assignments
- command line option
static_groups
- command line option
strict
- command line option
surgeries
- command line option
sym
- command line option

T

target_device
- command line option, [1]
target_dtype
- command line option
target_modules
- command line option, [1], [2], [3], [4], [5]
target_opset
- command line option, [1], [2], [3], [4]
tensor_quant_overrides
- command line option, [1]
tool_command
- command line option
tool_command_args
- command line option

torch_dtype
- command line option, [1], [2], [3], [4], [5], [6]
train_data_config
- command line option, [1], [2], [3], [4], [5], [6]
training_args
- command line option, [1], [2], [3], [4], [5], [6]
training_loop_func
- command line option
trt_fp16_enable
- command line option
true_sequential
- command line option
tuning_criterion
- command line option, [1], [2]

U

unique_embeds_lm_head_splits
- command line option
use_dynamo_exporter
- command line option
use_effective_conv2d
- command line option, [1]
use_enhanced_quantizer
- command line option
use_external_data_format
- command line option
use_forced_decoder_ids
- command line option
use_gpu
- command line option, [1]
use_gqa
- command line option
use_int4
- command line option

use_logits_processor
- command line option
use_prefix_vocab_mask
- command line option
use_qdq
- command line option
use_symbolic_shape_infer
- command line option
use_temperature
- command line option
use_transpose_op
- command line option
use_vocab_mask
- command line option
user_script
- command line option, [1], [2], [3], [4], [5], [6], [7]

V

val_data_config
- command line option

version
- command line option

W

w_bit
- command line option
warmup_ratio (olive.passes.pytorch.lora.HFTrainingArguments attribute)
weight_only_config
- command line option, [1], [2]
weight_only_quant_configs
- command line option
weight_sharing
- command line option

weight_symmetric
- command line option, [1], [2]
weight_type
- command line option, [1], [2], [3]
WeightSymmetric
- command line option
workspace
- command line option, [1], [2]

Z

zero_point
- command line option