Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 95%
184 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 00:52 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 00:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Implementation of LlamaTune space adapter.
8LlamaTune is a technique that transforms the original parameter space into a
9lower-dimensional space to try and improve the sample efficiency of the underlying
10optimizer by making use of the inherent parameter sensitivity correlations in most
11systems.
13See Also: `LlamaTune: Sample-Efficient DBMS Configuration Tuning
14<https://www.microsoft.com/en-us/research/publication/llamatune-sample-efficient-dbms-configuration-tuning>`_.
15"""
16import os
17from importlib.metadata import version
18from typing import Any
19from warnings import warn
21import ConfigSpace
22import ConfigSpace.exceptions
23import numpy as np
24import numpy.typing as npt
25import pandas as pd
26from ConfigSpace.hyperparameters import NumericalHyperparameter
27from packaging.version import Version
28from sklearn.preprocessing import MinMaxScaler
30from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
31from mlos_core.util import normalize_config
33_NUMPY_VERS = Version(version("numpy"))
36class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes
37 """Implementation of LlamaTune, a set of parameter space transformation techniques,
38 aimed at improving the sample-efficiency of the underlying optimizer.
39 """
41 DEFAULT_NUM_LOW_DIMS = 16
42 """Default number of dimensions in the low-dimensional search space, generated by
43 HeSBO projection.
44 """
46 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = 0.2
47 """Default percentage of bias for each special parameter value."""
49 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000
50 """Default number of (max) unique values of each parameter, when space
51 discretization is used.
52 """
54 def __init__( # pylint: disable=too-many-arguments
55 self,
56 *,
57 orig_parameter_space: ConfigSpace.ConfigurationSpace,
58 num_low_dims: int = DEFAULT_NUM_LOW_DIMS,
59 special_param_values: dict | None = None,
60 max_unique_values_per_param: int | None = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM,
61 use_approximate_reverse_mapping: bool = False,
62 ):
63 """
64 Create a space adapter that employs LlamaTune's techniques.
66 Parameters
67 ----------
68 orig_parameter_space : ConfigSpace.ConfigurationSpace
69 The original (user-provided) parameter space to optimize.
70 num_low_dims : int
71 Number of dimensions used in the low-dimensional parameter search space.
72 special_param_values : dict | None
73 Dictionary of special parameter values.
74 Each key is the name of a parameter, and the value is either:
75 - an integer (special value), or
76 - a tuple of an integer and a float (special integer value and biasing percentage)
77 max_unique_values_per_param : int | None
78 Number of unique values per parameter. Used to discretize the parameter space.
79 If `None` space discretization is disabled.
80 use_approximate_reverse_mapping : bool
81 Whether to use an approximate reverse mapping to help register
82 configurations during resume.
83 """
84 super().__init__(orig_parameter_space=orig_parameter_space)
86 if num_low_dims >= len(orig_parameter_space):
87 raise ValueError(
88 "Number of target config space dimensions should be "
89 "less than those of original config space."
90 )
92 # Validate input special param values dict
93 special_param_values = special_param_values or {}
94 self._validate_special_param_values(special_param_values)
96 # Create low-dimensional parameter search space
97 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param)
99 # Initialize config values scaler: from (-1, 1) to (0, 1) range
100 config_scaler = MinMaxScaler(feature_range=(0, 1))
101 ones_vector = np.ones(len(list(self.orig_parameter_space.values())))
102 config_scaler.fit(np.array([-ones_vector, ones_vector]))
103 self._config_scaler = config_scaler
105 # Generate random mapping from low-dimensional space to original config space
106 num_orig_dims = len(list(self.orig_parameter_space.values()))
107 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims)
108 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims)
110 # Used to retrieve the low-dim point, given the high-dim one
111 self._suggested_configs: dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {}
112 self._pinv_matrix: npt.NDArray
113 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping
115 @property
116 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace:
117 """Get the parameter space, which is explored by the underlying optimizer."""
118 return self._target_config_space
120 def inverse_transform(self, configuration: pd.Series) -> pd.Series:
121 config = ConfigSpace.Configuration(
122 self.orig_parameter_space,
123 values=configuration.dropna().to_dict(),
124 )
126 target_config = self._suggested_configs.get(config, None)
127 # NOTE: HeSBO is a non-linear projection method, and does not inherently
128 # support inverse projection
129 # To (partly) support this operation, we keep track of the suggested
130 # low-dim point(s) along with the respective high-dim point; this way we
131 # can retrieve the low-dim point, from its high-dim counterpart.
132 if target_config is None:
133 # Inherently it is not supported to register points, which were not
134 # suggested by the optimizer.
135 if config == self.orig_parameter_space.get_default_configuration():
136 # Default configuration should always be registerable.
137 pass
138 elif not self._use_approximate_reverse_mapping:
139 raise ValueError(
140 f"{repr(config)}\n"
141 "The above configuration was not suggested by the optimizer. "
142 "Approximate reverse mapping is currently disabled; "
143 "thus *only* configurations suggested "
144 "previously by the optimizer can be registered."
145 )
147 target_config = self._try_inverse_transform_config(config)
149 return pd.Series(target_config, index=list(self.target_parameter_space.keys()))
151 def _try_inverse_transform_config(
152 self,
153 config: ConfigSpace.Configuration,
154 ) -> ConfigSpace.Configuration:
155 """
156 Attempts to generate an inverse mapping of the given configuration that wasn't
157 previously registered.
159 Parameters
160 ----------
161 configuration : ConfigSpace.Configuration
162 Configuration in the original high-dimensional space.
164 Returns
165 -------
166 ConfigSpace.Configuration
167 Configuration in the low-dimensional space.
169 Raises
170 ------
171 ValueError
172 On conversion errors.
173 """
174 # ...yet, we try to support that by implementing an approximate
175 # reverse mapping using pseudo-inverse matrix.
176 if getattr(self, "_pinv_matrix", None) is None:
177 self._try_generate_approx_inverse_mapping()
179 # Replace NaNs with zeros for inactive hyperparameters
180 config_vector = np.nan_to_num(config.get_array(), nan=0.0)
181 # Perform approximate reverse mapping
182 # NOTE: applying special value biasing is not possible
183 vector: npt.NDArray = self._config_scaler.inverse_transform(np.array([config_vector]))[0]
184 target_config_vector: npt.NDArray = self._pinv_matrix.dot(vector)
185 # Clip values to to [-1, 1] range of the low dimensional space.
186 for idx, value in enumerate(target_config_vector):
187 target_config_vector[idx] = np.clip(value, -1, 1)
188 if self._q_scaler is not None:
189 # If the max_unique_values_per_param is set, we need to scale
190 # the low dimension space back to the discretized space as well.
191 target_config_vector = self._q_scaler.inverse_transform(
192 np.array([target_config_vector])
193 )[0]
194 assert isinstance(target_config_vector, np.ndarray)
195 # Clip values to [1, max_value] range (floating point errors may occur).
196 for idx, value in enumerate(target_config_vector):
197 target_config_vector[idx] = int(np.clip(value, 1, self._q_scaler.data_max_[idx]))
198 target_config_vector = target_config_vector.astype(int)
199 # Convert the vector to a dictionary.
200 target_config_dict = dict(
201 zip(
202 self.target_parameter_space.keys(),
203 target_config_vector,
204 )
205 )
206 target_config = ConfigSpace.Configuration(
207 self.target_parameter_space,
208 values=target_config_dict,
209 # This method results in hyperparameter type conversion issues
210 # (e.g., float instead of int), so we use the values dict instead.
211 # vector=target_config_vector,
212 )
214 # Check to see if the approximate reverse mapping looks OK.
215 # Note: we know this isn't 100% accurate, so this is just a warning and
216 # mostly meant for internal debugging.
217 configuration_dict = dict(config)
218 double_checked_config = self._transform(dict(target_config))
219 double_checked_config = {
220 # Skip the special values that aren't in the original space.
221 k: v
222 for k, v in double_checked_config.items()
223 if k in configuration_dict
224 }
225 if double_checked_config != configuration_dict and (
226 os.environ.get("MLOS_DEBUG", "false").lower() in {"1", "true", "y", "yes"}
227 ):
228 warn(
229 (
230 f"Note: Configuration {configuration_dict} was inverse transformed to "
231 f"{dict(target_config)} and then back to {double_checked_config}. "
232 "This is an approximate reverse mapping for previously unregistered "
233 "configurations, so this is just a warning."
234 ),
235 UserWarning,
236 )
238 # But the inverse mapping should at least be valid in the target space.
239 try:
240 ConfigSpace.Configuration(
241 self.target_parameter_space,
242 values=target_config,
243 ).check_valid_configuration()
244 except ConfigSpace.exceptions.IllegalValueError as err:
245 raise ValueError(
246 f"Invalid configuration {target_config} generated by "
247 f"inverse mapping of {config}:\n{err}"
248 ) from err
250 return target_config
252 def transform(self, configuration: pd.Series) -> pd.Series:
253 target_values_dict = configuration.to_dict()
254 target_configuration = ConfigSpace.Configuration(
255 self.target_parameter_space,
256 values=target_values_dict,
257 )
259 orig_values_dict = self._transform(target_values_dict)
260 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict)
262 # Validate that the configuration is in the original space.
263 try:
264 ConfigSpace.Configuration(
265 self.orig_parameter_space,
266 values=orig_configuration,
267 ).check_valid_configuration()
268 except ConfigSpace.exceptions.IllegalValueError as err:
269 raise ValueError(
270 f"Invalid configuration {orig_configuration} generated by "
271 f"transformation of {target_configuration}:\n{err}"
272 ) from err
274 # Add to inverse dictionary -- needed for registering the performance later
275 self._suggested_configs[orig_configuration] = target_configuration
277 ret: pd.Series = pd.Series(
278 list(orig_configuration.values()), index=list(orig_configuration.keys())
279 )
280 return ret
282 def _construct_low_dim_space(
283 self,
284 num_low_dims: int,
285 max_unique_values_per_param: int | None,
286 ) -> None:
287 """
288 Constructs the low-dimensional parameter (potentially discretized) search space.
290 Parameters
291 ----------
292 num_low_dims : int
293 Number of dimensions used in the low-dimensional parameter search space.
295 max_unique_values_per_param: int | None:
296 Number of unique values per parameter. Used to discretize the parameter space.
297 If `None` space discretization is disabled.
298 """
299 # Define target space parameters
300 q_scaler = None
301 hyperparameters: list[
302 ConfigSpace.UniformFloatHyperparameter | ConfigSpace.UniformIntegerHyperparameter
303 ]
304 if max_unique_values_per_param is None:
305 hyperparameters = [
306 ConfigSpace.UniformFloatHyperparameter(name=f"dim_{idx}", lower=-1, upper=1)
307 for idx in range(num_low_dims)
308 ]
309 else:
310 # Currently supported optimizers do not support defining a discretized
311 # space (like ConfigSpace does using `q` kwarg).
312 # Thus, to support space discretization, we define the low-dimensional
313 # space using integer hyperparameters.
314 # We also employ a scaler, which scales suggested values to [-1, 1]
315 # range, used by HeSBO projection.
316 hyperparameters = [
317 ConfigSpace.UniformIntegerHyperparameter(
318 name=f"dim_{idx}",
319 lower=1,
320 upper=max_unique_values_per_param,
321 )
322 for idx in range(num_low_dims)
323 ]
325 # Initialize quantized values scaler:
326 # from [0, max_unique_values_per_param] to (-1, 1) range
327 q_scaler = MinMaxScaler(feature_range=(-1, 1))
328 ones_vector = np.ones(num_low_dims)
329 max_value_vector = ones_vector * max_unique_values_per_param
330 q_scaler.fit(np.array([ones_vector, max_value_vector]))
332 self._q_scaler = q_scaler
334 # Construct low-dimensional parameter search space
335 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name)
336 # use same random state as in original parameter space
337 config_space.random = self._random_state
338 config_space.add(hyperparameters)
339 self._target_config_space = config_space
341 def _transform(self, configuration: dict) -> dict:
342 """
343 Projects a low-dimensional point (configuration) to the high-dimensional
344 original parameter space, and then biases the resulting parameter values towards
345 their special value(s) (if any).
347 Parameters
348 ----------
349 configuration : dict
350 Configuration in the low-dimensional space.
352 Returns
353 -------
354 configuration : dict
355 Projected configuration in the high-dimensional original search space.
356 """
357 original_parameters = list(self.orig_parameter_space.values())
358 low_dim_config_values = list(configuration.values())
360 if self._q_scaler is not None:
361 # Scale parameter values from [1, max_value] to [-1, 1]
362 low_dim_config_values = self._q_scaler.transform(np.array([low_dim_config_values]))[0]
364 # Project low-dim point to original parameter space
365 original_config_values = [
366 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]]
367 for idx in range(len(original_parameters))
368 ]
369 # Scale parameter values to [0, 1]
370 original_config_values = self._config_scaler.transform(np.array([original_config_values]))[
371 0
372 ]
374 original_config = {}
375 for param, norm_value in zip(original_parameters, original_config_values):
376 # Clip value to force it to fall in [0, 1]
377 # NOTE: HeSBO projection ensures that theoretically but due to
378 # floating point ops nuances this is not always guaranteed
379 value = np.clip(norm_value, 0, 1)
381 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
382 index = int(value * len(param.choices)) # truncate integer part
383 index = max(0, min(len(param.choices) - 1, index))
384 # NOTE: potential rounding here would be unfair to first & last values
385 orig_value = param.choices[index]
386 elif isinstance(param, NumericalHyperparameter):
387 if param.name in self._special_param_values_dict:
388 value = self._special_param_value_scaler(param, value)
390 orig_value = param.to_value(value)
391 orig_value = np.clip(orig_value, param.lower, param.upper)
393 if _NUMPY_VERS >= Version("2.0"):
394 # Convert numpy types to native Python types (e.g., np.int64 to int)
395 # This was performed automatically in NumPy<2.0, but not anymore.
396 # see, https://numpy.org/doc/stable/reference/generated/numpy.can_cast.html
397 orig_value = orig_value.item()
399 else:
400 raise NotImplementedError(
401 "Only Categorical, Integer, and Float hyperparameters are currently supported."
402 )
404 original_config[param.name] = orig_value
406 return original_config
408 def _special_param_value_scaler(
409 self,
410 param: NumericalHyperparameter,
411 input_value: float,
412 ) -> float:
413 """
414 Biases the special value(s) of this parameter, by shifting the normalized
415 `input_value` towards those.
417 Parameters
418 ----------
419 param: NumericalHyperparameter
420 Parameter of the original parameter space.
422 input_value: float
423 Normalized value for this parameter, as suggested by the underlying optimizer.
425 Returns
426 -------
427 biased_value: float
428 Normalized value after special value(s) biasing is applied.
429 """
430 special_values_list = self._special_param_values_dict[param.name]
432 # Check if input value corresponds to some special value
433 perc_sum = 0.0
434 for special_value, biasing_perc in special_values_list:
435 perc_sum += biasing_perc
436 if input_value < perc_sum:
437 return float(param.to_vector(special_value))
439 # Scale input value uniformly to non-special values
440 return float(param.to_vector((input_value - perc_sum) / (1 - perc_sum)))
442 # pylint: disable=too-complex,too-many-branches
443 def _validate_special_param_values(self, special_param_values_dict: dict) -> None:
444 """
445 Checks that the user-provided dict of special parameter values is valid. And
446 assigns it to the corresponding attribute.
448 Parameters
449 ----------
450 special_param_values_dict: dict
451 User-provided dict of special parameter values.
453 Raises
454 ------
455 ValueError: if dictionary key, valid, or structure is invalid.
456 NotImplementedError: if special value is defined for a non-integer parameter
457 """
458 error_prefix = "Validation of special parameter values dict failed."
460 all_parameters = list(self.orig_parameter_space.keys())
461 sanitized_dict = {}
463 for param, value in special_param_values_dict.items():
464 if param not in all_parameters:
465 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.")
467 hyperparameter = self.orig_parameter_space[param]
468 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter):
469 raise NotImplementedError(
470 error_prefix + f"Parameter '{param}' is not supported. "
471 "Only Integer Hyperparameters are currently supported."
472 )
474 if isinstance(value, int):
475 # User specifies a single special value -- default biasing percentage is used
476 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)]
477 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]:
478 # User specifies both special value and biasing percentage
479 tuple_list = [value]
480 elif isinstance(value, list) and value:
481 if all(isinstance(t, int) for t in value):
482 # User specifies list of special values
483 tuple_list = [
484 (v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value
485 ]
486 elif all(
487 isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value
488 ):
489 # User specifies list of tuples; each tuple defines the special
490 # value and the biasing percentage
491 tuple_list = value
492 else:
493 raise ValueError(
494 error_prefix + f"Invalid format in value list for parameter '{param}'. "
495 f"Special value list should contain either integers, "
496 "or (special value, biasing %) tuples."
497 )
498 else:
499 raise ValueError(
500 error_prefix + f"Invalid format for parameter '{param}'. Dict value should be "
501 "an int, a (int, float) tuple, a list of integers, "
502 "or a list of (int, float) tuples."
503 )
505 # Are user-specified special values valid?
506 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list):
507 raise ValueError(
508 error_prefix
509 + "One (or more) special values are outside of parameter "
510 + f"'{param}' value domain."
511 )
512 # Are user-provided special values unique?
513 if len({v for v, _ in tuple_list}) != len(tuple_list):
514 raise ValueError(
515 error_prefix
516 + "One (or more) special values are defined more than once "
517 + f"for parameter '{param}'."
518 )
519 # Are biasing percentages valid?
520 if not all(0 < perc < 1 for _, perc in tuple_list):
521 raise ValueError(
522 error_prefix
523 + f"One (or more) biasing percentages for parameter '{param}' are invalid: "
524 "i.e., fall outside (0, 1) range."
525 )
527 total_percentage = sum(perc for _, perc in tuple_list)
528 if total_percentage >= 1.0:
529 raise ValueError(
530 error_prefix
531 + f"Total special values percentage for parameter '{param}' surpass 100%."
532 )
533 # ... and reasonable?
534 if total_percentage >= 0.5:
535 warn(
536 f"Total special values percentage for parameter '{param}' exceeds 50%.",
537 UserWarning,
538 )
540 sanitized_dict[param] = tuple_list
542 self._special_param_values_dict = sanitized_dict
544 def _try_generate_approx_inverse_mapping(self) -> None:
545 """Tries to generate an approximate reverse mapping:
546 i.e., from high-dimensional space to the low-dimensional one.
548 Reverse mapping is generated using the pseudo-inverse matrix, of original
549 HeSBO projection matrix.
550 This mapping can be potentially used to register configurations that were
551 *not* previously suggested by the optimizer.
553 NOTE: This method is experimental, and there is currently no guarantee that
554 it works as expected.
556 Raises
557 ------
558 RuntimeError: if reverse mapping computation fails.
559 """
560 from scipy.linalg import ( # pylint: disable=import-outside-toplevel
561 LinAlgError,
562 pinv,
563 )
565 warn(
566 (
567 "Trying to register a configuration that was not "
568 "previously suggested by the optimizer.\n"
569 "This inverse configuration transformation is typically not supported.\n"
570 "However, we will try to register this configuration "
571 "using an *experimental* method."
572 ),
573 UserWarning,
574 )
576 orig_space_num_dims = len(list(self.orig_parameter_space.values()))
577 target_space_num_dims = len(list(self.target_parameter_space.values()))
579 # Construct dense projection matrix from sparse repr
580 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims))
581 for row, col in enumerate(self._h_matrix):
582 proj_matrix[row][col] = self._sigma_vector[row]
584 # Compute pseudo-inverse matrix
585 try:
586 _inv = pinv(proj_matrix)
587 assert _inv is not None and not isinstance(_inv, tuple)
588 inv_matrix: npt.NDArray[np.floating[Any]] = _inv
589 self._pinv_matrix = inv_matrix
590 except LinAlgError as err:
591 raise RuntimeError(
592 f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}"
593 ) from err
594 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)