-
calibrate_method
-
calibration_batch_size
-
calibration_data_config
-
calibration_nsamples
-
calibration_providers
-
calibration_sampling_size
-
checkpoint_path
-
command line option
- a_bits
- a_per_token
- a_symmetric
- accuracy_level
- activation_symmetric, [1], [2]
- activation_type, [1], [2]
- ActivationSymmetric
- add_zero_point
- AddQDQPairToWeight
- algorithm
- all_tensors_to_one_file, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- allow_tf32, [1], [2], [3], [4], [5]
- alpha, [1], [2], [3], [4], [5]
- alpha_pattern, [1]
- approach, [1], [2]
- atol
- backend, [1], [2], [3]
- batch_size
- binary_file
- bits
- block_size
- block_to_split
- blocksize
- calibrate_method, [1], [2]
- calibration_batch_size
- calibration_data_config
- calibration_nsamples
- calibration_providers, [1]
- calibration_sampling_size, [1]
- checkpoint_path
- components
- compress_to_fp16
- compute_dtype, [1]
- context_length
- convert_attribute, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- cost_model
- cpu_cores
- damp_percent
- data_config, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12]
- decompose_both
- decompose_factor
- desc_act
- device, [1], [2], [3], [4], [5], [6]
- device_map, [1], [2], [3], [4], [5]
- dim_param
- dim_value
- disable_cpu_fallback
- do_validate
- domain, [1], [2]
- double_quant
- duo_scaling
- dynamic
- dynamic_lora_r
- element_wise_binary_ops
- embed_context
- enable_cuda_graph, [1]
- enable_dpu
- enable_htp
- enable_profiling
- ephemeral_gpu_offload, [1], [2], [3], [4], [5]
- eval_data_config, [1], [2], [3], [4], [5]
- example_input_func
- exclude_embeds
- exclude_lm_head
- exclude_modules, [1]
- excluded_precisions, [1], [2]
- execution_mode_list
- export_compatible
- external_data_name, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- extra_args, [1], [2], [3], [4]
- extra_configs, [1]
- extra_options, [1], [2], [3]
- extra_session_config
- final_orientation
- float16, [1]
- force_evaluate_other_eps
- force_fp16_inputs
- force_fp32_nodes
- force_fp32_ops
- fp16, [1]
- fuse_layernorm
- gpus
- group_size
- hidden_size
- htp_socs
- ignored_scope
- ignored_scope_type
- init_weights, [1]
- input
- input_dim
- input_int32
- input_layouts
- input_model_dtype
- input_name
- input_names
- input_nodes
- input_shape
- input_shapes
- input_types
- inputs_to_make_channel_last
- inside_layer_modules
- int4_accuracy_level
- int4_block_size
- int4_op_types_to_quantize
- inter_thread_num_list
- intra_thread_num_list
- io_bind
- is_symmetric
- keep_io_types, [1]
- layer_name_filter, [1]
- layers_block_name
- layers_pattern, [1]
- layers_to_transform, [1]
- lib_name
- lib_targets
- loftq_iter
- logger
- lora_dropout, [1], [2], [3], [4], [5]
- make_inputs
- max_finite_val
- max_layer, [1]
- merge_adapter_weights
- metadata_only
- metric, [1], [2]
- min_layer, [1]
- min_positive_val
- min_real_range, [1]
- model_type, [1]
- module_dropout, [1]
- modules_to_fuse
- modules_to_not_convert
- modules_to_save, [1], [2], [3], [4], [5]
- name_pattern
- need_layer_fusing
- no_repeat_ngram_size
- node_block_list
- nodes_to_exclude, [1], [2], [3], [4], [5]
- nodes_to_include
- nodes_to_quantize, [1], [2], [3]
- num_epochs
- num_heads
- num_key_value_heads
- num_splits
- num_steps
- only_onnxruntime
- op_block_list, [1]
- op_type_dict, [1], [2]
- op_types_to_exclude, [1], [2]
- op_types_to_quantize, [1], [2], [3], [4]
- opt_level
- opt_level_list
- optimization_options
- optimize
- optimize_model
- optional_inputs
- out_node
- output_model
- output_names
- output_nodes
- outputs_to_make_channel_last
- outside_layer_modules
- overrides_config
- parallel_jobs
- past_key_value_name
- per_channel, [1], [2], [3]
- percdamp
- post
- pre
- precision
- prepare_qdq_config, [1]
- preset
- provider_options
- provider_options_list
- providers_list
- ptl_data_module
- ptl_module
- q_group_size
- qconfig_func
- quant_axes
- quant_format, [1], [2], [3], [4]
- quant_level, [1], [2]
- quant_mode, [1], [2], [3]
- quant_preprocess, [1], [2], [3]
- quant_type
- r, [1], [2], [3], [4], [5]
- rank_dropout, [1]
- rank_dropout_scale
- rank_pattern, [1]
- recipes, [1], [2]
- reduce_range, [1], [2], [3], [4], [5]
- rotate_mode, [1]
- round_interval
- save_as_external_data, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- save_format
- save_metadata_for_token_generation
- save_quant_config, [1]
- script_dir, [1], [2], [3], [4], [5], [6], [7]
- search
- seed, [1], [2], [3]
- session_options
- size_threshold, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
- skip_onnx_shape
- skip_optimization
- skip_symbolic_shape
- source_dtype
- sparsity, [1]
- split_assignments
- static_groups
- strict
- surgeries
- sym
- target_device, [1]
- target_dtype
- target_modules, [1], [2], [3], [4], [5]
- target_opset, [1], [2], [3], [4]
- tensor_quant_overrides, [1]
- tool_command
- tool_command_args
- torch_dtype, [1], [2], [3], [4], [5], [6]
- train_data_config, [1], [2], [3], [4], [5], [6]
- training_args, [1], [2], [3], [4], [5], [6]
- training_loop_func
- trt_fp16_enable
- true_sequential
- tuning_criterion, [1], [2]
- unique_embeds_lm_head_splits
- use_dynamo_exporter
- use_effective_conv2d, [1]
- use_enhanced_quantizer
- use_external_data_format
- use_forced_decoder_ids
- use_gpu, [1]
- use_gqa
- use_int4
- use_logits_processor
- use_prefix_vocab_mask
- use_qdq
- use_symbolic_shape_infer
- use_temperature
- use_transpose_op
- use_vocab_mask
- user_script, [1], [2], [3], [4], [5], [6], [7]
- val_data_config
- version
- w_bit
- weight_only_config, [1], [2]
- weight_only_quant_configs
- weight_sharing
- weight_symmetric, [1], [2]
- weight_type, [1], [2], [3]
- WeightSymmetric
- workspace, [1], [2]
- zero_point
|
-
components
-
compress_to_fp16
-
compute_dtype
-
context_length
-
convert_attribute
- command line option, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28]
-
cost_model
-
cpu_cores
|