Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 99%
119 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Contains the :py:class:`.BaseOptimizer` abstract class."""
7import collections
8from abc import ABCMeta, abstractmethod
9from typing import List, Optional, Tuple, Union
11import ConfigSpace
12import numpy as np
13import numpy.typing as npt
14import pandas as pd
16from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
17from mlos_core.util import config_to_dataframe
20class BaseOptimizer(metaclass=ABCMeta):
21 """Optimizer abstract base class defining the basic interface:
22 :py:meth:`~.BaseOptimizer.suggest`,
23 :py:meth:`~.BaseOptimizer.register`,
24 """
26 # pylint: disable=too-many-instance-attributes
28 def __init__(
29 self,
30 *,
31 parameter_space: ConfigSpace.ConfigurationSpace,
32 optimization_targets: List[str],
33 objective_weights: Optional[List[float]] = None,
34 space_adapter: Optional[BaseSpaceAdapter] = None,
35 ):
36 """
37 Create a new instance of the base optimizer.
39 Parameters
40 ----------
41 parameter_space : ConfigSpace.ConfigurationSpace
42 The parameter space to optimize.
43 optimization_targets : List[str]
44 The names of the optimization targets to minimize.
45 To maximize a target, use the negative of the target when registering scores.
46 objective_weights : Optional[List[float]]
47 Optional list of weights of optimization targets.
48 space_adapter : BaseSpaceAdapter
49 The space adapter class to employ for parameter space transformations.
50 """
51 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space
52 """The parameter space to optimize."""
54 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = (
55 parameter_space if space_adapter is None else space_adapter.target_parameter_space
56 )
57 """
58 The parameter space actually used by the optimizer.
60 (in case a :py:mod:`SpaceAdapter <mlos_core.spaces.adapters>` is used)
61 """
63 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space:
64 raise ValueError("Given parameter space differs from the one given to space adapter")
66 self._optimization_targets = optimization_targets
67 self._objective_weights = objective_weights
68 if objective_weights is not None and len(objective_weights) != len(optimization_targets):
69 raise ValueError("Number of weights must match the number of optimization targets")
71 self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter
72 self._observations: List[Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]] = []
73 self._has_context: Optional[bool] = None
74 self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = []
76 def __repr__(self) -> str:
77 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})"
79 @property
80 def space_adapter(self) -> Optional[BaseSpaceAdapter]:
81 """Get the space adapter instance (if any)."""
82 return self._space_adapter
84 def register(
85 self,
86 *,
87 configs: pd.DataFrame,
88 scores: pd.DataFrame,
89 context: Optional[pd.DataFrame] = None,
90 metadata: Optional[pd.DataFrame] = None,
91 ) -> None:
92 """
93 Wrapper method, which employs the space adapter (if any), before registering the
94 configs and scores.
96 Parameters
97 ----------
98 configs : pandas.DataFrame
99 Dataframe of configs / parameters. The columns are parameter names and
100 the rows are the configs.
101 scores : pandas.DataFrame
102 Scores from running the configs. The index is the same as the index of the configs.
104 context : pandas.DataFrame
105 Not Yet Implemented.
107 metadata : Optional[pandas.DataFrame]
108 Metadata returned by the backend optimizer's suggest method.
109 """
110 # Do some input validation.
111 assert metadata is None or isinstance(metadata, pd.DataFrame)
112 assert set(scores.columns) == set(
113 self._optimization_targets
114 ), "Mismatched optimization targets."
115 assert self._has_context is None or self._has_context ^ (
116 context is None
117 ), "Context must always be added or never be added."
118 assert len(configs) == len(scores), "Mismatched number of configs and scores."
119 if context is not None:
120 assert len(configs) == len(context), "Mismatched number of configs and context."
121 assert configs.shape[1] == len(
122 self.parameter_space.values()
123 ), "Mismatched configuration shape."
124 self._observations.append((configs, scores, context))
125 self._has_context = context is not None
127 if self._space_adapter:
128 configs = self._space_adapter.inverse_transform(configs)
129 assert configs.shape[1] == len(
130 self.optimizer_parameter_space.values()
131 ), "Mismatched configuration shape after inverse transform."
132 return self._register(configs=configs, scores=scores, context=context)
134 @abstractmethod
135 def _register(
136 self,
137 *,
138 configs: pd.DataFrame,
139 scores: pd.DataFrame,
140 context: Optional[pd.DataFrame] = None,
141 metadata: Optional[pd.DataFrame] = None,
142 ) -> None:
143 """
144 Registers the given configs and scores.
146 Parameters
147 ----------
148 configs : pandas.DataFrame
149 Dataframe of configs / parameters. The columns are parameter names and
150 the rows are the configs.
151 scores : pandas.DataFrame
152 Scores from running the configs. The index is the same as the index of the configs.
154 context : pandas.DataFrame
155 Not Yet Implemented.
156 """
157 pass # pylint: disable=unnecessary-pass # pragma: no cover
159 def suggest(
160 self,
161 *,
162 context: Optional[pd.DataFrame] = None,
163 defaults: bool = False,
164 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
165 """
166 Wrapper method, which employs the space adapter (if any), after suggesting a new
167 configuration.
169 Parameters
170 ----------
171 context : pandas.DataFrame
172 Not Yet Implemented.
173 defaults : bool
174 Whether or not to return the default config instead of an optimizer guided one.
175 By default, use the one from the optimizer.
177 Returns
178 -------
179 configuration : pandas.DataFrame
180 Pandas dataframe with a single row. Column names are the parameter names.
182 metadata : Optional[pandas.DataFrame]
183 The metadata associated with the given configuration used for evaluations.
184 Backend optimizer specific.
185 """
186 if defaults:
187 configuration = config_to_dataframe(self.parameter_space.get_default_configuration())
188 metadata = None
189 if self.space_adapter is not None:
190 configuration = self.space_adapter.inverse_transform(configuration)
191 else:
192 configuration, metadata = self._suggest(context=context)
193 assert len(configuration) == 1, "Suggest must return a single configuration."
194 assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), (
195 "Optimizer suggested a configuration that does "
196 "not match the expected parameter space."
197 )
198 if self._space_adapter:
199 configuration = self._space_adapter.transform(configuration)
200 assert set(configuration.columns).issubset(set(self.parameter_space)), (
201 "Space adapter produced a configuration that does "
202 "not match the expected parameter space."
203 )
204 return configuration, metadata
206 @abstractmethod
207 def _suggest(
208 self,
209 *,
210 context: Optional[pd.DataFrame] = None,
211 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]:
212 """
213 Suggests a new configuration.
215 Parameters
216 ----------
217 context : pandas.DataFrame
218 Not Yet Implemented.
220 Returns
221 -------
222 configuration : pandas.DataFrame
223 Pandas dataframe with a single row. Column names are the parameter names.
225 metadata : Optional[pandas.DataFrame]
226 The metadata associated with the given configuration used for evaluations.
227 Backend optimizer specific.
228 """
229 pass # pylint: disable=unnecessary-pass # pragma: no cover
231 @abstractmethod
232 def register_pending(
233 self,
234 *,
235 configs: pd.DataFrame,
236 context: Optional[pd.DataFrame] = None,
237 metadata: Optional[pd.DataFrame] = None,
238 ) -> None:
239 """
240 Registers the given configs as "pending". That is it say, it has been suggested
241 by the optimizer, and an experiment trial has been started. This can be useful
242 for executing multiple trials in parallel, retry logic, etc.
244 Parameters
245 ----------
246 configs : pandas.DataFrame
247 Dataframe of configs / parameters. The columns are parameter names and
248 the rows are the configs.
249 context : pandas.DataFrame
250 Not Yet Implemented.
251 metadata : Optional[pandas.DataFrame]
252 Metadata returned by the backend optimizer's suggest method.
253 """
254 pass # pylint: disable=unnecessary-pass # pragma: no cover
256 def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]:
257 """
258 Returns the observations as a triplet of DataFrames (config, score, context).
260 Returns
261 -------
262 observations : Tuple[pandas.DataFrame, pandas.DataFrame, Optional[pandas.DataFrame]]
263 A triplet of (config, score, context) DataFrames of observations.
264 """
265 if len(self._observations) == 0:
266 raise ValueError("No observations registered yet.")
267 configs = pd.concat([config for config, _, _ in self._observations]).reset_index(drop=True)
268 scores = pd.concat([score for _, score, _ in self._observations]).reset_index(drop=True)
269 contexts = pd.concat(
270 [
271 pd.DataFrame() if context is None else context
272 for _, _, context in self._observations
273 ]
274 ).reset_index(drop=True)
275 return (configs, scores, contexts if len(contexts.columns) > 0 else None)
277 def get_best_observations(
278 self,
279 *,
280 n_max: int = 1,
281 ) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]:
282 """
283 Get the N best observations so far as a triplet of DataFrames (config, score,
284 context). Default is N=1. The columns are ordered in ASCENDING order of the
285 optimization targets. The function uses `pandas.DataFrame.nsmallest(...,
286 keep="first")` method under the hood.
288 Parameters
289 ----------
290 n_max : int
291 Maximum number of best observations to return. Default is 1.
293 Returns
294 -------
295 observations : Tuple[pandas.DataFrame, pandas.DataFrame, Optional[pandas.DataFrame]]
296 A triplet of best (config, score, context) DataFrames of best observations.
297 """
298 if len(self._observations) == 0:
299 raise ValueError("No observations registered yet.")
300 (configs, scores, contexts) = self.get_observations()
301 idx = scores.nsmallest(n_max, columns=self._optimization_targets, keep="first").index
302 return (configs.loc[idx], scores.loc[idx], None if contexts is None else contexts.loc[idx])
304 def cleanup(self) -> None:
305 """
306 Remove temp files, release resources, etc.
308 after use. Default is no-op. Redefine this method in optimizers that require
309 cleanup.
310 """
312 def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame:
313 """Convert numpy array from one-hot encoding to a DataFrame with categoricals
314 and ints in proper columns.
315 """
316 df_dict = collections.defaultdict(list)
317 for i in range(config.shape[0]):
318 j = 0
319 for param in self.optimizer_parameter_space.values():
320 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
321 for offset, val in enumerate(param.choices):
322 if config[i][j + offset] == 1:
323 df_dict[param.name].append(val)
324 break
325 j += len(param.choices)
326 else:
327 val = config[i][j]
328 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter):
329 val = int(val)
330 df_dict[param.name].append(val)
331 j += 1
332 return pd.DataFrame(df_dict)
334 def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray:
335 """Convert pandas DataFrame to one-hot-encoded numpy array."""
336 n_cols = 0
337 n_rows = config.shape[0] if config.ndim > 1 else 1
338 for param in self.optimizer_parameter_space.values():
339 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
340 n_cols += len(param.choices)
341 else:
342 n_cols += 1
343 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32)
344 for i in range(n_rows):
345 j = 0
346 for param in self.optimizer_parameter_space.values():
347 if config.ndim > 1:
348 assert isinstance(config, pd.DataFrame)
349 col = config.columns.get_loc(param.name)
350 assert isinstance(col, int)
351 val = config.iloc[i, col]
352 else:
353 assert isinstance(config, pd.Series)
354 col = config.index.get_loc(param.name)
355 assert isinstance(col, int)
356 val = config.iloc[col]
357 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
358 offset = param.choices.index(val)
359 one_hot[i][j + offset] = 1
360 j += len(param.choices)
361 else:
362 one_hot[i][j] = val
363 j += 1
364 return one_hot