Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 95%
180 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Implementation of LlamaTune space adapter.
8LlamaTune is a technique that transforms the original parameter space into a
9lower-dimensional space to try and improve the sample efficiency of the underlying
10optimizer by making use of the inherent parameter sensitivity correlations in most
11systems.
13See Also: `LlamaTune: Sample-Efficient DBMS Configuration Tuning
14<https://www.microsoft.com/en-us/research/publication/llamatune-sample-efficient-dbms-configuration-tuning>`_.
15"""
16import os
17from typing import Dict, List, Optional, Union
18from warnings import warn
20import ConfigSpace
21import ConfigSpace.exceptions
22import numpy as np
23import numpy.typing as npt
24import pandas as pd
25from ConfigSpace.hyperparameters import NumericalHyperparameter
26from sklearn.preprocessing import MinMaxScaler
28from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
29from mlos_core.util import drop_nulls, normalize_config
32class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes
33 """Implementation of LlamaTune, a set of parameter space transformation techniques,
34 aimed at improving the sample-efficiency of the underlying optimizer.
35 """
37 DEFAULT_NUM_LOW_DIMS = 16
38 """Default number of dimensions in the low-dimensional search space, generated by
39 HeSBO projection.
40 """
42 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = 0.2
43 """Default percentage of bias for each special parameter value."""
45 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000
46 """Default number of (max) unique values of each parameter, when space
47 discretization is used.
48 """
50 def __init__( # pylint: disable=too-many-arguments
51 self,
52 *,
53 orig_parameter_space: ConfigSpace.ConfigurationSpace,
54 num_low_dims: int = DEFAULT_NUM_LOW_DIMS,
55 special_param_values: Optional[dict] = None,
56 max_unique_values_per_param: Optional[int] = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM,
57 use_approximate_reverse_mapping: bool = False,
58 ):
59 """
60 Create a space adapter that employs LlamaTune's techniques.
62 Parameters
63 ----------
64 orig_parameter_space : ConfigSpace.ConfigurationSpace
65 The original (user-provided) parameter space to optimize.
66 num_low_dims : int
67 Number of dimensions used in the low-dimensional parameter search space.
68 special_param_values_dict : Optional[dict]
69 Dictionary of special
70 max_unique_values_per_param : Optional[int]
71 Number of unique values per parameter. Used to discretize the parameter space.
72 If `None` space discretization is disabled.
73 """
74 super().__init__(orig_parameter_space=orig_parameter_space)
76 if num_low_dims >= len(orig_parameter_space):
77 raise ValueError(
78 "Number of target config space dimensions should be "
79 "less than those of original config space."
80 )
82 # Validate input special param values dict
83 special_param_values = special_param_values or {}
84 self._validate_special_param_values(special_param_values)
86 # Create low-dimensional parameter search space
87 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param)
89 # Initialize config values scaler: from (-1, 1) to (0, 1) range
90 config_scaler = MinMaxScaler(feature_range=(0, 1))
91 ones_vector = np.ones(len(list(self.orig_parameter_space.values())))
92 config_scaler.fit([-ones_vector, ones_vector])
93 self._config_scaler = config_scaler
95 # Generate random mapping from low-dimensional space to original config space
96 num_orig_dims = len(list(self.orig_parameter_space.values()))
97 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims)
98 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims)
100 # Used to retrieve the low-dim point, given the high-dim one
101 self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {}
102 self._pinv_matrix: npt.NDArray
103 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping
105 @property
106 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace:
107 """Get the parameter space, which is explored by the underlying optimizer."""
108 return self._target_config_space
110 def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame:
111 target_configurations = []
112 for _, config in configurations.astype("O").iterrows():
113 configuration = ConfigSpace.Configuration(
114 self.orig_parameter_space,
115 values=drop_nulls(config.to_dict()),
116 )
118 target_config = self._suggested_configs.get(configuration, None)
119 # NOTE: HeSBO is a non-linear projection method, and does not inherently
120 # support inverse projection
121 # To (partly) support this operation, we keep track of the suggested
122 # low-dim point(s) along with the respective high-dim point; this way we
123 # can retrieve the low-dim point, from its high-dim counterpart.
124 if target_config is None:
125 # Inherently it is not supported to register points, which were not
126 # suggested by the optimizer.
127 if configuration == self.orig_parameter_space.get_default_configuration():
128 # Default configuration should always be registerable.
129 pass
130 elif not self._use_approximate_reverse_mapping:
131 raise ValueError(
132 f"{repr(configuration)}\n"
133 "The above configuration was not suggested by the optimizer. "
134 "Approximate reverse mapping is currently disabled; "
135 "thus *only* configurations suggested "
136 "previously by the optimizer can be registered."
137 )
138 # else ...
139 target_config = self._try_inverse_transform_config(configuration)
141 target_configurations.append(target_config)
143 return pd.DataFrame(
144 target_configurations,
145 columns=list(self.target_parameter_space.keys()),
146 )
148 def _try_inverse_transform_config(
149 self,
150 config: ConfigSpace.Configuration,
151 ) -> ConfigSpace.Configuration:
152 """
153 Attempts to generate an inverse mapping of the given configuration that wasn't
154 previously registered.
156 Parameters
157 ----------
158 configuration : ConfigSpace.Configuration
159 Configuration in the original high-dimensional space.
161 Returns
162 -------
163 ConfigSpace.Configuration
164 Configuration in the low-dimensional space.
166 Raises
167 ------
168 ValueError
169 On conversion errors.
170 """
171 # ...yet, we try to support that by implementing an approximate
172 # reverse mapping using pseudo-inverse matrix.
173 if getattr(self, "_pinv_matrix", None) is None:
174 self._try_generate_approx_inverse_mapping()
176 # Replace NaNs with zeros for inactive hyperparameters
177 config_vector = np.nan_to_num(config.get_array(), nan=0.0)
178 # Perform approximate reverse mapping
179 # NOTE: applying special value biasing is not possible
180 vector: npt.NDArray = self._config_scaler.inverse_transform([config_vector])[0]
181 target_config_vector: npt.NDArray = self._pinv_matrix.dot(vector)
182 # Clip values to to [-1, 1] range of the low dimensional space.
183 for idx, value in enumerate(target_config_vector):
184 target_config_vector[idx] = np.clip(value, -1, 1)
185 if self._q_scaler is not None:
186 # If the max_unique_values_per_param is set, we need to scale
187 # the low dimension space back to the discretized space as well.
188 target_config_vector = self._q_scaler.inverse_transform([target_config_vector])[0]
189 assert isinstance(target_config_vector, np.ndarray)
190 # Clip values to [1, max_value] range (floating point errors may occur).
191 for idx, value in enumerate(target_config_vector):
192 target_config_vector[idx] = int(np.clip(value, 1, self._q_scaler.data_max_[idx]))
193 target_config_vector = target_config_vector.astype(int)
194 # Convert the vector to a dictionary.
195 target_config_dict = dict(
196 zip(
197 self.target_parameter_space.keys(),
198 target_config_vector,
199 )
200 )
201 target_config = ConfigSpace.Configuration(
202 self.target_parameter_space,
203 values=target_config_dict,
204 # This method results in hyperparameter type conversion issues
205 # (e.g., float instead of int), so we use the values dict instead.
206 # vector=target_config_vector,
207 )
209 # Check to see if the approximate reverse mapping looks OK.
210 # Note: we know this isn't 100% accurate, so this is just a warning and
211 # mostly meant for internal debugging.
212 configuration_dict = dict(config)
213 double_checked_config = self._transform(dict(target_config))
214 double_checked_config = {
215 # Skip the special values that aren't in the original space.
216 k: v
217 for k, v in double_checked_config.items()
218 if k in configuration_dict
219 }
220 if double_checked_config != configuration_dict and (
221 os.environ.get("MLOS_DEBUG", "false").lower() in {"1", "true", "y", "yes"}
222 ):
223 warn(
224 (
225 f"Note: Configuration {configuration_dict} was inverse transformed to "
226 f"{dict(target_config)} and then back to {double_checked_config}. "
227 "This is an approximate reverse mapping for previously unregistered "
228 "configurations, so this is just a warning."
229 ),
230 UserWarning,
231 )
233 # But the inverse mapping should at least be valid in the target space.
234 try:
235 ConfigSpace.Configuration(
236 self.target_parameter_space,
237 values=target_config,
238 ).check_valid_configuration()
239 except ConfigSpace.exceptions.IllegalValueError as e:
240 raise ValueError(
241 f"Invalid configuration {target_config} generated by "
242 f"inverse mapping of {config}:\n{e}"
243 ) from e
245 return target_config
247 def transform(self, configuration: pd.DataFrame) -> pd.DataFrame:
248 if len(configuration) != 1:
249 raise ValueError(
250 "Configuration dataframe must contain exactly 1 row. "
251 f"Found {len(configuration)} rows."
252 )
254 target_values_dict = configuration.iloc[0].to_dict()
255 target_configuration = ConfigSpace.Configuration(
256 self.target_parameter_space,
257 values=target_values_dict,
258 )
260 orig_values_dict = self._transform(target_values_dict)
261 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict)
263 # Validate that the configuration is in the original space.
264 try:
265 ConfigSpace.Configuration(
266 self.orig_parameter_space,
267 values=orig_configuration,
268 ).check_valid_configuration()
269 except ConfigSpace.exceptions.IllegalValueError as e:
270 raise ValueError(
271 f"Invalid configuration {orig_configuration} generated by "
272 f"transformation of {target_configuration}:\n{e}"
273 ) from e
275 # Add to inverse dictionary -- needed for registering the performance later
276 self._suggested_configs[orig_configuration] = target_configuration
278 return pd.DataFrame(
279 [list(orig_configuration.values())], columns=list(orig_configuration.keys())
280 )
282 def _construct_low_dim_space(
283 self,
284 num_low_dims: int,
285 max_unique_values_per_param: Optional[int],
286 ) -> None:
287 """
288 Constructs the low-dimensional parameter (potentially discretized) search space.
290 Parameters
291 ----------
292 num_low_dims : int
293 Number of dimensions used in the low-dimensional parameter search space.
295 max_unique_values_per_param: Optional[int]:
296 Number of unique values per parameter. Used to discretize the parameter space.
297 If `None` space discretization is disabled.
298 """
299 # Define target space parameters
300 q_scaler = None
301 hyperparameters: List[
302 Union[ConfigSpace.UniformFloatHyperparameter, ConfigSpace.UniformIntegerHyperparameter]
303 ]
304 if max_unique_values_per_param is None:
305 hyperparameters = [
306 ConfigSpace.UniformFloatHyperparameter(name=f"dim_{idx}", lower=-1, upper=1)
307 for idx in range(num_low_dims)
308 ]
309 else:
310 # Currently supported optimizers do not support defining a discretized
311 # space (like ConfigSpace does using `q` kwarg).
312 # Thus, to support space discretization, we define the low-dimensional
313 # space using integer hyperparameters.
314 # We also employ a scaler, which scales suggested values to [-1, 1]
315 # range, used by HeSBO projection.
316 hyperparameters = [
317 ConfigSpace.UniformIntegerHyperparameter(
318 name=f"dim_{idx}",
319 lower=1,
320 upper=max_unique_values_per_param,
321 )
322 for idx in range(num_low_dims)
323 ]
325 # Initialize quantized values scaler:
326 # from [0, max_unique_values_per_param] to (-1, 1) range
327 q_scaler = MinMaxScaler(feature_range=(-1, 1))
328 ones_vector = np.ones(num_low_dims)
329 max_value_vector = ones_vector * max_unique_values_per_param
330 q_scaler.fit([ones_vector, max_value_vector])
332 self._q_scaler = q_scaler
334 # Construct low-dimensional parameter search space
335 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name)
336 # use same random state as in original parameter space
337 config_space.random = self._random_state
338 config_space.add(hyperparameters)
339 self._target_config_space = config_space
341 def _transform(self, configuration: dict) -> dict:
342 """
343 Projects a low-dimensional point (configuration) to the high-dimensional
344 original parameter space, and then biases the resulting parameter values towards
345 their special value(s) (if any).
347 Parameters
348 ----------
349 configuration : dict
350 Configuration in the low-dimensional space.
352 Returns
353 -------
354 configuration : dict
355 Projected configuration in the high-dimensional original search space.
356 """
357 original_parameters = list(self.orig_parameter_space.values())
358 low_dim_config_values = list(configuration.values())
360 if self._q_scaler is not None:
361 # Scale parameter values from [1, max_value] to [-1, 1]
362 low_dim_config_values = self._q_scaler.transform([low_dim_config_values])[0]
364 # Project low-dim point to original parameter space
365 original_config_values = [
366 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]]
367 for idx in range(len(original_parameters))
368 ]
369 # Scale parameter values to [0, 1]
370 original_config_values = self._config_scaler.transform([original_config_values])[0]
372 original_config = {}
373 for param, norm_value in zip(original_parameters, original_config_values):
374 # Clip value to force it to fall in [0, 1]
375 # NOTE: HeSBO projection ensures that theoretically but due to
376 # floating point ops nuances this is not always guaranteed
377 value = np.clip(norm_value, 0, 1)
379 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
380 index = int(value * len(param.choices)) # truncate integer part
381 index = max(0, min(len(param.choices) - 1, index))
382 # NOTE: potential rounding here would be unfair to first & last values
383 orig_value = param.choices[index]
384 elif isinstance(param, NumericalHyperparameter):
385 if param.name in self._special_param_values_dict:
386 value = self._special_param_value_scaler(param, value)
388 orig_value = param.to_value(value)
389 orig_value = np.clip(orig_value, param.lower, param.upper)
390 else:
391 raise NotImplementedError(
392 "Only Categorical, Integer, and Float hyperparameters are currently supported."
393 )
395 original_config[param.name] = orig_value
397 return original_config
399 def _special_param_value_scaler(
400 self,
401 param: NumericalHyperparameter,
402 input_value: float,
403 ) -> float:
404 """
405 Biases the special value(s) of this parameter, by shifting the normalized
406 `input_value` towards those.
408 Parameters
409 ----------
410 param: NumericalHyperparameter
411 Parameter of the original parameter space.
413 input_value: float
414 Normalized value for this parameter, as suggested by the underlying optimizer.
416 Returns
417 -------
418 biased_value: float
419 Normalized value after special value(s) biasing is applied.
420 """
421 special_values_list = self._special_param_values_dict[param.name]
423 # Check if input value corresponds to some special value
424 perc_sum = 0.0
425 for special_value, biasing_perc in special_values_list:
426 perc_sum += biasing_perc
427 if input_value < perc_sum:
428 return float(param.to_vector(special_value))
430 # Scale input value uniformly to non-special values
431 return float(param.to_vector((input_value - perc_sum) / (1 - perc_sum)))
433 # pylint: disable=too-complex,too-many-branches
434 def _validate_special_param_values(self, special_param_values_dict: dict) -> None:
435 """
436 Checks that the user-provided dict of special parameter values is valid. And
437 assigns it to the corresponding attribute.
439 Parameters
440 ----------
441 special_param_values_dict: dict
442 User-provided dict of special parameter values.
444 Raises
445 ------
446 ValueError: if dictionary key, valid, or structure is invalid.
447 NotImplementedError: if special value is defined for a non-integer parameter
448 """
449 error_prefix = "Validation of special parameter values dict failed."
451 all_parameters = list(self.orig_parameter_space.keys())
452 sanitized_dict = {}
454 for param, value in special_param_values_dict.items():
455 if param not in all_parameters:
456 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.")
458 hyperparameter = self.orig_parameter_space[param]
459 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter):
460 raise NotImplementedError(
461 error_prefix + f"Parameter '{param}' is not supported. "
462 "Only Integer Hyperparameters are currently supported."
463 )
465 if isinstance(value, int):
466 # User specifies a single special value -- default biasing percentage is used
467 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)]
468 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]:
469 # User specifies both special value and biasing percentage
470 tuple_list = [value]
471 elif isinstance(value, list) and value:
472 if all(isinstance(t, int) for t in value):
473 # User specifies list of special values
474 tuple_list = [
475 (v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value
476 ]
477 elif all(
478 isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value
479 ):
480 # User specifies list of tuples; each tuple defines the special
481 # value and the biasing percentage
482 tuple_list = value
483 else:
484 raise ValueError(
485 error_prefix + f"Invalid format in value list for parameter '{param}'. "
486 f"Special value list should contain either integers, "
487 "or (special value, biasing %) tuples."
488 )
489 else:
490 raise ValueError(
491 error_prefix + f"Invalid format for parameter '{param}'. Dict value should be "
492 "an int, a (int, float) tuple, a list of integers, "
493 "or a list of (int, float) tuples."
494 )
496 # Are user-specified special values valid?
497 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list):
498 raise ValueError(
499 error_prefix
500 + "One (or more) special values are outside of parameter "
501 + f"'{param}' value domain."
502 )
503 # Are user-provided special values unique?
504 if len(set(v for v, _ in tuple_list)) != len(tuple_list):
505 raise ValueError(
506 error_prefix
507 + "One (or more) special values are defined more than once "
508 + f"for parameter '{param}'."
509 )
510 # Are biasing percentages valid?
511 if not all(0 < perc < 1 for _, perc in tuple_list):
512 raise ValueError(
513 error_prefix
514 + f"One (or more) biasing percentages for parameter '{param}' are invalid: "
515 "i.e., fall outside (0, 1) range."
516 )
518 total_percentage = sum(perc for _, perc in tuple_list)
519 if total_percentage >= 1.0:
520 raise ValueError(
521 error_prefix
522 + f"Total special values percentage for parameter '{param}' surpass 100%."
523 )
524 # ... and reasonable?
525 if total_percentage >= 0.5:
526 warn(
527 f"Total special values percentage for parameter '{param}' exceeds 50%.",
528 UserWarning,
529 )
531 sanitized_dict[param] = tuple_list
533 self._special_param_values_dict = sanitized_dict
535 def _try_generate_approx_inverse_mapping(self) -> None:
536 """Tries to generate an approximate reverse mapping:
537 i.e., from high-dimensional space to the low-dimensional one.
539 Reverse mapping is generated using the pseudo-inverse matrix, of original
540 HeSBO projection matrix.
541 This mapping can be potentially used to register configurations that were
542 *not* previously suggested by the optimizer.
544 NOTE: This method is experimental, and there is currently no guarantee that
545 it works as expected.
547 Raises
548 ------
549 RuntimeError: if reverse mapping computation fails.
550 """
551 from scipy.linalg import ( # pylint: disable=import-outside-toplevel
552 LinAlgError,
553 pinv,
554 )
556 warn(
557 (
558 "Trying to register a configuration that was not "
559 "previously suggested by the optimizer.\n"
560 "This inverse configuration transformation is typically not supported.\n"
561 "However, we will try to register this configuration "
562 "using an *experimental* method."
563 ),
564 UserWarning,
565 )
567 orig_space_num_dims = len(list(self.orig_parameter_space.values()))
568 target_space_num_dims = len(list(self.target_parameter_space.values()))
570 # Construct dense projection matrix from sparse repr
571 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims))
572 for row, col in enumerate(self._h_matrix):
573 proj_matrix[row][col] = self._sigma_vector[row]
575 # Compute pseudo-inverse matrix
576 try:
577 self._pinv_matrix = pinv(proj_matrix)
578 except LinAlgError as err:
579 raise RuntimeError(
580 f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}"
581 ) from err
582 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)