-
calibrate_method
-
calibration_batch_size
-
calibration_data_config
-
calibration_nsamples
-
calibration_sampling_size
-
checkpoint_path
-
command line option
- a_asym
- a_bits
- a_clip_ratio
- a_groupsize
- accuracy_level
- activation_type, [1], [2]
- ActivationSymmetric, [1], [2], [3]
- add_zero_point
- AddQDQPairToWeight
- algorithm
- all_tensors_to_one_file, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24]
- allow_tf32, [1], [2]
- append_first_op_types_to_quantize_list, [1], [2]
- approach, [1], [2]
- atol
- azureml_client, [1], [2]
- backend, [1], [2], [3]
- binary_file
- bits
- block_size
- block_to_split
- blocksize
- calibrate_method, [1], [2]
- calibration_batch_size, [1]
- calibration_data_config, [1]
- calibration_nsamples, [1]
- calibration_sampling_size, [1]
- checkpoint_path
- components
- compress_to_fp16
- compute_dtype, [1]
- convert_attribute, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24]
- cost_model
- cpu_cores
- damp_percent
- data_config, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12]
- datastore_name
- datastore_url
- desc_act
- device, [1], [2], [3], [4], [5], [6]
- dim_param
- dim_value
- do_validate
- domain, [1], [2]
- double_quant
- duo_scaling
- dynamic
- dynamic_lora_r
- element_wise_binary_ops
- enable_cuda_graph, [1]
- enable_dpu
- enable_htp
- enable_profiling
- EnableSubgraph, [1], [2]
- eval_data_config, [1], [2]
- example_input_func
- exclude_embeds, [1]
- exclude_lm_head, [1]
- excluded_precisions, [1], [2]
- execution_mode_list
- export_compatible
- external_data_name, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24]
- extra.Sigmoid.nnapi, [1], [2]
- extra_args, [1], [2], [3], [4]
- extra_configs, [1]
- extra_options, [1], [2], [3]
- extra_session_config
- final_orientation
- fixed_point
- float16, [1]
- force_evaluate_other_eps
- force_fp16_inputs
- force_fp32_nodes
- force_fp32_ops
- ForceQuantizeNoInputCheck, [1], [2]
- fp16, [1]
- fuse_layernorm
- gptq_damping
- gptq_opt_scales
- gpus
- group
- group_size
- hidden_size
- htp_socs
- ignored_scope
- ignored_scope_type
- input
- input_dim
- input_int32
- input_layouts
- input_model_dtype, [1]
- input_name
- input_names
- input_nodes
- input_shape
- input_shapes
- input_types
- inputs_to_make_channel_last
- inside_layer_modules
- int4_accuracy_level
- int4_block_size
- inter_thread_num_list
- intra_thread_num_list
- io_bind
- is_symmetric
- job_name
- k_bits
- k_clip_ratio
- k_groupsize
- keep_io_types, [1]
- layer_name_filter, [1]
- layers_block_name
- lib_name
- lib_targets
- loftq_iter
- logger
- lora_alpha, [1], [2]
- lora_dropout, [1], [2]
- lora_r, [1], [2]
- make_inputs
- MatMulConstBOnly, [1], [2]
- max_finite_val
- max_layer, [1]
- merge_adapter_weights
- metadata_only
- metric, [1], [2]
- min_layer, [1]
- min_positive_val
- model_type, [1]
- modules_to_fuse
- modules_to_not_convert
- modules_to_save, [1], [2]
- multivariate
- name, [1]
- name_pattern
- need_layer_fusing
- no_repeat_ngram_size
- node_block_list
- nodes_to_exclude, [1], [2], [3], [4]
- nodes_to_quantize, [1], [2], [3]
- num_epochs
- num_heads
- num_key_value_heads
- num_samples, [1]
- num_splits
- num_steps
- only_onnxruntime
- onnxoptimizer
- op_block_list, [1]
- op_type_dict, [1], [2]
- op_types_to_quantize, [1], [2], [3]
- opt_level
- opt_level_list
- optimization_options
- optimize_model
- optional_inputs
- out_node
- output_model
- output_name
- output_names
- output_nodes
- outputs_to_make_channel_last
- outside_layer_modules
- overrides_config
- parallel_jobs
- passes
- past_key_value_name
- path, [1]
- per_channel, [1], [2], [3]
- percdamp
- post
- pre
- precision
- prepare_qnn_config, [1]
- preset
- provider_options_list
- providers_list
- ptl_data_module
- ptl_module
- q_group_size
- qconfig_func
- qnn_extra_options, [1]
- quant_format, [1], [2], [3], [4]
- quant_level, [1], [2]
- quant_mode, [1], [2], [3]
- quant_preprocess, [1], [2], [3]
- quant_type
- recipes, [1], [2]
- reduce_range, [1], [2], [3], [4], [5]
- relative_path, [1]
- rotate
- rotation_seed
- round_interval
- s_bits
- save_as_external_data, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24]
- save_format
- save_metadata_for_token_generation
- save_quant_config, [1]
- script_dir, [1], [2], [3], [4], [5], [6], [7]
- search
- seed, [1], [2], [3], [4]
- size_threshold, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24]
- source_dtype
- sparsity, [1]
- static_groups
- strict
- sym
- target_device, [1]
- target_dtype
- target_modules
- target_opset, [1], [2], [3], [4]
- tool_command
- tool_command_args
- torch_dtype, [1], [2], [3]
- train_data_config, [1], [2], [3]
- training_args, [1], [2]
- training_loop_func
- trt_fp16_enable
- true_sequential
- tuning_criterion, [1], [2]
- use_dynamo_exporter
- use_enhanced_quantizer
- use_external_data_format
- use_forced_decoder_ids
- use_gpu, [1]
- use_gqa
- use_int4
- use_logits_processor
- use_prefix_vocab_mask
- use_symbolic_shape_infer
- use_temperature
- use_transpose_op
- use_vocab_mask
- user_script, [1], [2], [3], [4], [5], [6], [7]
- v_bits
- v_clip_ratio
- v_groupsize
- val_data_config
- version, [1]
- w_asym
- w_bit
- w_bits
- w_gptq
- w_groupsize
- w_rtn
- weight_only_config, [1], [2]
- weight_only_quant_configs
- weight_type, [1], [2], [3]
- WeightSymmetric, [1], [2], [3]
- with_replacement
- workspace, [1], [2]
- zero_point
|
-
components
- CompositeModelHandler (class in olive.model)
-
compress_to_fp16
-
compute_dtype
- conda_file_path (olive.systems.azureml.AzureMLDockerConfig attribute)
- config (olive.systems.system_config.SystemConfig attribute)
-
convert_attribute
- command line option, [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24]
-
cost_model
- CPU (olive.hardware.accelerator.Device attribute)
-
cpu_cores
- CPU_SPR (olive.hardware.accelerator.Device attribute)
- create_client() (olive.azureml.azureml_client.AzureMLClientConfig method)
- create_evaluator() (olive.evaluator.olive_evaluator.OliveEvaluatorConfig method)
- create_registry_client() (olive.azureml.azureml_client.AzureMLClientConfig method)
- create_system() (olive.systems.system_config.SystemConfig method)
- create_training_args() (olive.passes.pytorch.lora.HFTrainingArguments method)
- CUSTOM (olive.evaluator.metric.MetricType attribute)
|