Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 97%
155 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Implementation of LlamaTune space adapter.
7"""
8from typing import Dict, Optional
9from warnings import warn
11import ConfigSpace
12import numpy as np
13import numpy.typing as npt
14import pandas as pd
15from sklearn.preprocessing import MinMaxScaler
17from mlos_core.util import normalize_config
18from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
21class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes
22 """
23 Implementation of LlamaTune, a set of parameter space transformation techniques,
24 aimed at improving the sample-efficiency of the underlying optimizer.
25 """
27 DEFAULT_NUM_LOW_DIMS = 16
28 """Default number of dimensions in the low-dimensional search space, generated by HeSBO projection"""
30 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = .2
31 """Default percentage of bias for each special parameter value"""
33 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000
34 """Default number of (max) unique values of each parameter, when space discretization is used"""
36 def __init__(self, *,
37 orig_parameter_space: ConfigSpace.ConfigurationSpace,
38 num_low_dims: int = DEFAULT_NUM_LOW_DIMS,
39 special_param_values: Optional[dict] = None,
40 max_unique_values_per_param: Optional[int] = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM,
41 use_approximate_reverse_mapping: bool = False):
42 """
43 Create a space adapter that employs LlamaTune's techniques.
45 Parameters
46 ----------
47 orig_parameter_space : ConfigSpace.ConfigurationSpace
48 The original (user-provided) parameter space to optimize.
49 num_low_dims: int
50 Number of dimensions used in the low-dimensional parameter search space.
51 special_param_values_dict: Optional[dict]
52 Dictionary of special
53 max_unique_values_per_param: Optional[int]:
54 Number of unique values per parameter. Used to discretize the parameter space.
55 If `None` space discretization is disabled.
56 """
57 super().__init__(orig_parameter_space=orig_parameter_space)
59 if num_low_dims >= len(orig_parameter_space):
60 raise ValueError("Number of target config space dimensions should be less than those of original config space.")
62 # Validate input special param values dict
63 special_param_values = special_param_values or {}
64 self._validate_special_param_values(special_param_values)
66 # Create low-dimensional parameter search space
67 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param)
69 # Initialize config values scaler: from (-1, 1) to (0, 1) range
70 config_scaler = MinMaxScaler(feature_range=(0, 1))
71 ones_vector = np.ones(len(list(self.orig_parameter_space.values())))
72 config_scaler.fit([-ones_vector, ones_vector])
73 self._config_scaler = config_scaler
75 # Generate random mapping from low-dimensional space to original config space
76 num_orig_dims = len(list(self.orig_parameter_space.values()))
77 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims)
78 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims)
80 # Used to retrieve the low-dim point, given the high-dim one
81 self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {}
82 self._pinv_matrix: npt.NDArray
83 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping
85 @property
86 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace:
87 """Get the parameter space, which is explored by the underlying optimizer."""
88 return self._target_config_space
90 def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame:
91 target_configurations = []
92 for (_, config) in configurations.astype('O').iterrows():
93 configuration = ConfigSpace.Configuration(
94 self.orig_parameter_space, values=config.to_dict())
96 target_config = self._suggested_configs.get(configuration, None)
97 # NOTE: HeSBO is a non-linear projection method, and does not inherently support inverse projection
98 # To (partly) support this operation, we keep track of the suggested low-dim point(s) along with the
99 # respective high-dim point; this way we can retrieve the low-dim point, from its high-dim counterpart.
100 if target_config is None:
101 # Inherently it is not supported to register points, which were not suggested by the optimizer.
102 if configuration == self.orig_parameter_space.get_default_configuration():
103 # Default configuration should always be registerable.
104 pass
105 elif not self._use_approximate_reverse_mapping:
106 raise ValueError(f"{repr(configuration)}\n" "The above configuration was not suggested by the optimizer. "
107 "Approximate reverse mapping is currently disabled; thus *only* configurations suggested "
108 "previously by the optimizer can be registered.")
110 # ...yet, we try to support that by implementing an approximate reverse mapping using pseudo-inverse matrix.
111 if getattr(self, '_pinv_matrix', None) is None:
112 self._try_generate_approx_inverse_mapping()
114 # Replace NaNs with zeros for inactive hyperparameters
115 config_vector = np.nan_to_num(configuration.get_array(), nan=0.0)
116 # Perform approximate reverse mapping
117 # NOTE: applying special value biasing is not possible
118 vector = self._config_scaler.inverse_transform([config_vector])[0]
119 target_config_vector = self._pinv_matrix.dot(vector)
120 target_config = ConfigSpace.Configuration(self.target_parameter_space, vector=target_config_vector)
122 target_configurations.append(target_config)
124 return pd.DataFrame(target_configurations, columns=list(self.target_parameter_space.keys()))
126 def transform(self, configuration: pd.DataFrame) -> pd.DataFrame:
127 if len(configuration) != 1:
128 raise ValueError("Configuration dataframe must contain exactly 1 row. "
129 f"Found {len(configuration)} rows.")
131 target_values_dict = configuration.iloc[0].to_dict()
132 target_configuration = ConfigSpace.Configuration(self.target_parameter_space, values=target_values_dict)
134 orig_values_dict = self._transform(target_values_dict)
135 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict)
137 # Add to inverse dictionary -- needed for registering the performance later
138 self._suggested_configs[orig_configuration] = target_configuration
140 return pd.DataFrame([list(orig_configuration.values())], columns=list(orig_configuration.keys()))
142 def _construct_low_dim_space(self, num_low_dims: int, max_unique_values_per_param: Optional[int]) -> None:
143 """Constructs the low-dimensional parameter (potentially discretized) search space.
145 Parameters
146 ----------
147 num_low_dims : int
148 Number of dimensions used in the low-dimensional parameter search space.
150 max_unique_values_per_param: Optional[int]:
151 Number of unique values per parameter. Used to discretize the parameter space.
152 If `None` space discretization is disabled.
153 """
154 # Define target space parameters
155 q_scaler = None
156 if max_unique_values_per_param is None:
157 hyperparameters = [
158 ConfigSpace.UniformFloatHyperparameter(name=f'dim_{idx}', lower=-1, upper=1)
159 for idx in range(num_low_dims)
160 ]
161 else:
162 # Currently supported optimizers do not support defining a discretized space (like ConfigSpace does using `q` kwarg).
163 # Thus, to support space discretization, we define the low-dimensional space using integer hyperparameters.
164 # We also employ a scaler, which scales suggested values to [-1, 1] range, used by HeSBO projection.
165 hyperparameters = [
166 ConfigSpace.UniformIntegerHyperparameter(name=f'dim_{idx}', lower=1, upper=max_unique_values_per_param)
167 for idx in range(num_low_dims)
168 ]
170 # Initialize quantized values scaler: from [0, max_unique_values_per_param] to (-1, 1) range
171 q_scaler = MinMaxScaler(feature_range=(-1, 1))
172 ones_vector = np.ones(num_low_dims)
173 max_value_vector = ones_vector * max_unique_values_per_param
174 q_scaler.fit([ones_vector, max_value_vector])
176 self._q_scaler = q_scaler
178 # Construct low-dimensional parameter search space
179 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name)
180 config_space.random = self._random_state # use same random state as in original parameter space
181 config_space.add_hyperparameters(hyperparameters)
182 self._target_config_space = config_space
184 def _transform(self, configuration: dict) -> dict:
185 """Projects a low-dimensional point (configuration) to the high-dimensional original parameter space,
186 and then biases the resulting parameter values towards their special value(s) (if any).
188 Parameters
189 ----------
190 configuration : dict
191 Configuration in the low-dimensional space.
193 Returns
194 -------
195 configuration : dict
196 Projected configuration in the high-dimensional original search space.
197 """
198 original_parameters = list(self.orig_parameter_space.values())
199 low_dim_config_values = list(configuration.values())
201 if self._q_scaler is not None:
202 # Scale parameter values from [1, max_value] to [-1, 1]
203 low_dim_config_values = self._q_scaler.transform([low_dim_config_values])[0]
205 # Project low-dim point to original parameter space
206 original_config_values = [
207 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]]
208 for idx in range(len(original_parameters))
209 ]
210 # Scale parameter values to [0, 1]
211 original_config_values = self._config_scaler.transform([original_config_values])[0]
213 original_config = {}
214 for param, norm_value in zip(original_parameters, original_config_values):
215 # Clip value to force it to fall in [0, 1]
216 # NOTE: HeSBO projection ensures that theoretically but due to
217 # floating point ops nuances this is not always guaranteed
218 value = max(0., min(1., norm_value)) # pylint: disable=redefined-loop-name
220 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
221 index = int(value * len(param.choices)) # truncate integer part
222 index = max(0, min(len(param.choices) - 1, index))
223 # NOTE: potential rounding here would be unfair to first & last values
224 orig_value = param.choices[index]
225 elif isinstance(param, ConfigSpace.hyperparameters.NumericalHyperparameter):
226 if param.name in self._special_param_values_dict:
227 value = self._special_param_value_scaler(param, value)
229 orig_value = param._transform(value) # pylint: disable=protected-access
230 orig_value = max(param.lower, min(param.upper, orig_value))
231 else:
232 raise NotImplementedError("Only Categorical, Integer, and Float hyperparameters are currently supported.")
234 original_config[param.name] = orig_value
236 return original_config
238 def _special_param_value_scaler(self, param: ConfigSpace.UniformIntegerHyperparameter, input_value: float) -> float:
239 """Biases the special value(s) of this parameter, by shifting the normalized `input_value` towards those.
241 Parameters
242 ----------
243 param: ConfigSpace.UniformIntegerHyperparameter
244 Parameter of the original parameter space.
246 input_value: float
247 Normalized value for this parameter, as suggested by the underlying optimizer.
249 Returns
250 -------
251 biased_value: float
252 Normalized value after special value(s) biasing is applied.
253 """
254 special_values_list = self._special_param_values_dict[param.name]
256 # Check if input value corresponds to some special value
257 perc_sum = 0.
258 ret: float
259 for special_value, biasing_perc in special_values_list:
260 perc_sum += biasing_perc
261 if input_value < perc_sum:
262 ret = param._inverse_transform(special_value) # pylint: disable=protected-access
263 return ret
265 # Scale input value uniformly to non-special values
266 ret = param._inverse_transform( # pylint: disable=protected-access
267 param._transform_scalar((input_value - perc_sum) / (1 - perc_sum))) # pylint: disable=protected-access
268 return ret
270 # pylint: disable=too-complex,too-many-branches
271 def _validate_special_param_values(self, special_param_values_dict: dict) -> None:
272 """Checks that the user-provided dict of special parameter values is valid.
273 And assigns it to the corresponding attribute.
275 Parameters
276 ----------
277 special_param_values_dict: dict
278 User-provided dict of special parameter values.
280 Raises
281 ------
282 ValueError: if dictionary key, valid, or structure is invalid.
283 NotImplementedError: if special value is defined for a non-integer parameter
284 """
285 error_prefix = "Validation of special parameter values dict failed."
287 all_parameters = list(self.orig_parameter_space.keys())
288 sanitized_dict = {}
290 for param, value in special_param_values_dict.items():
291 if param not in all_parameters:
292 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.")
294 hyperparameter = self.orig_parameter_space[param]
295 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter):
296 raise NotImplementedError(error_prefix + f"Parameter '{param}' is not supported. "
297 "Only Integer Hyperparameters are currently supported.")
299 if isinstance(value, int):
300 # User specifies a single special value -- default biasing percentage is used
301 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)]
302 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]:
303 # User specifies both special value and biasing percentage
304 tuple_list = [value]
305 elif isinstance(value, list) and value:
306 if all(isinstance(t, int) for t in value):
307 # User specifies list of special values
308 tuple_list = [(v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value]
309 elif all(isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value):
310 # User specifies list of tuples; each tuple defines the special value and the biasing percentage
311 tuple_list = value
312 else:
313 raise ValueError(error_prefix + f"Invalid format in value list for parameter '{param}'. "
314 f"Special value list should contain either integers, or (special value, biasing %) tuples.")
315 else:
316 raise ValueError(error_prefix + f"Invalid format for parameter '{param}'. Dict value should be "
317 "an int, a (int, float) tuple, a list of integers, or a list of (int, float) tuples.")
319 # Are user-specified special values valid?
320 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list):
321 raise ValueError(error_prefix + f"One (or more) special values are outside of parameter '{param}' value domain.")
322 # Are user-provided special values unique?
323 if len(set(v for v, _ in tuple_list)) != len(tuple_list):
324 raise ValueError(error_prefix + f"One (or more) special values are defined more than once for parameter '{param}'.")
325 # Are biasing percentages valid?
326 if not all(0 < perc < 1 for _, perc in tuple_list):
327 raise ValueError(error_prefix + f"One (or more) biasing percentages for parameter '{param}' are invalid: "
328 "i.e., fall outside (0, 1) range.")
330 total_percentage = sum(perc for _, perc in tuple_list)
331 if total_percentage >= 1.:
332 raise ValueError(error_prefix + f"Total special values percentage for parameter '{param}' surpass 100%.")
333 # ... and reasonable?
334 if total_percentage >= 0.5:
335 warn(f"Total special values percentage for parameter '{param}' exceeds 50%.", UserWarning)
337 sanitized_dict[param] = tuple_list
339 self._special_param_values_dict = sanitized_dict
341 def _try_generate_approx_inverse_mapping(self) -> None:
342 """Tries to generate an approximate reverse mapping: i.e., from high-dimensional space to the low-dimensional one.
343 Reverse mapping is generated using the pseudo-inverse matrix, of original HeSBO projection matrix.
344 This mapping can be potentially used to register configurations that were *not* previously suggested by the optimizer.
346 NOTE: This method is experimental, and there is currently no guarantee that it works as expected.
348 Raises
349 ------
350 RuntimeError: if reverse mapping computation fails.
351 """
352 from scipy.linalg import pinv, LinAlgError # pylint: disable=import-outside-toplevel
354 warn("Trying to register a configuration that was not previously suggested by the optimizer. " +
355 "This inverse configuration transformation is typically not supported. " +
356 "However, we will try to register this configuration using an *experimental* method.", UserWarning)
358 orig_space_num_dims = len(list(self.orig_parameter_space.values()))
359 target_space_num_dims = len(list(self.target_parameter_space.values()))
361 # Construct dense projection matrix from sparse repr
362 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims))
363 for row, col in enumerate(self._h_matrix):
364 proj_matrix[row][col] = self._sigma_vector[row]
366 # Compute pseudo-inverse matrix
367 try:
368 self._pinv_matrix = pinv(proj_matrix)
369 except LinAlgError as err:
370 raise RuntimeError(f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}") from err
371 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)