Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 98%
120 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-20 00:44 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-20 00:44 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Contains the :py:class:`.BaseOptimizer` abstract class."""
7import collections
8from abc import ABCMeta, abstractmethod
9from copy import deepcopy
10from typing import List, Optional, Tuple, Union
12import ConfigSpace
13import numpy as np
14import numpy.typing as npt
15import pandas as pd
17from mlos_core.data_classes import Observation, Observations, Suggestion
18from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
19from mlos_core.util import config_to_series
22class BaseOptimizer(metaclass=ABCMeta):
23 """Optimizer abstract base class defining the basic interface:
24 :py:meth:`~.BaseOptimizer.suggest`,
25 :py:meth:`~.BaseOptimizer.register`,
26 """
28 # pylint: disable=too-many-instance-attributes
30 def __init__(
31 self,
32 *,
33 parameter_space: ConfigSpace.ConfigurationSpace,
34 optimization_targets: List[str],
35 objective_weights: Optional[List[float]] = None,
36 space_adapter: Optional[BaseSpaceAdapter] = None,
37 ):
38 """
39 Create a new instance of the base optimizer.
41 Parameters
42 ----------
43 parameter_space : ConfigSpace.ConfigurationSpace
44 The parameter space to optimize.
45 optimization_targets : List[str]
46 The names of the optimization targets to minimize.
47 To maximize a target, use the negative of the target when registering scores.
48 objective_weights : Optional[List[float]]
49 Optional list of weights of optimization targets.
50 space_adapter : BaseSpaceAdapter
51 The space adapter class to employ for parameter space transformations.
52 """
53 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space
54 """The parameter space to optimize."""
56 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = (
57 parameter_space if space_adapter is None else space_adapter.target_parameter_space
58 )
59 """
60 The parameter space actually used by the optimizer.
62 (in case a :py:mod:`SpaceAdapter <mlos_core.spaces.adapters>` is used)
63 """
65 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space:
66 raise ValueError("Given parameter space differs from the one given to space adapter")
68 self._optimization_targets = optimization_targets
69 self._objective_weights = objective_weights
70 if objective_weights is not None and len(objective_weights) != len(optimization_targets):
71 raise ValueError("Number of weights must match the number of optimization targets")
73 self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter
74 self._observations: Observations = Observations()
75 self._has_context: Optional[bool] = None
76 self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = []
78 def __repr__(self) -> str:
79 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})"
81 @property
82 def space_adapter(self) -> Optional[BaseSpaceAdapter]:
83 """Get the space adapter instance (if any)."""
84 return self._space_adapter
86 def register(
87 self,
88 observations: Union[Observation, Observations],
89 ) -> None:
90 """
91 Register all observations at once. Exactly one of observations or observation
92 must be provided.
94 Parameters
95 ----------
96 observations: Optional[Union[Observation, Observations]]
97 The observations to register.
98 """
99 if isinstance(observations, Observation):
100 observations = Observations(observations=[observations])
101 # Check input and transform the observations if a space adapter is present.
102 observations = Observations(
103 observations=[
104 self._preprocess_observation(observation) for observation in observations
105 ]
106 )
107 # Now bulk register all observations (details delegated to the underlying classes).
108 self._register(observations)
110 def _preprocess_observation(self, observation: Observation) -> Observation:
111 """
112 Wrapper method, which employs the space adapter (if any), and does some input
113 validation, before registering the configs and scores.
115 Parameters
116 ----------
117 observation: Observation
118 The observation to register.
120 Returns
121 -------
122 observation: Observation
123 The (possibly transformed) observation to register.
124 """
125 # Do some input validation.
126 assert observation.metadata is None or isinstance(observation.metadata, pd.Series)
127 assert set(observation.score.index) == set(
128 self._optimization_targets
129 ), "Mismatched optimization targets."
130 assert self._has_context is None or self._has_context ^ (
131 observation.context is None
132 ), "Context must always be added or never be added."
133 assert len(observation.config) == len(
134 self.parameter_space.values()
135 ), "Mismatched configuration shape."
137 self._has_context = observation.context is not None
138 self._observations.append(observation)
140 transformed_observation = deepcopy(observation) # Needed to support named tuples
141 if self._space_adapter:
142 transformed_observation = Observation(
143 config=self._space_adapter.inverse_transform(transformed_observation.config),
144 score=transformed_observation.score,
145 context=transformed_observation.context,
146 metadata=transformed_observation.metadata,
147 )
148 assert len(transformed_observation.config) == len(
149 self.optimizer_parameter_space.values()
150 ), "Mismatched configuration shape after inverse transform."
151 return transformed_observation
153 @abstractmethod
154 def _register(
155 self,
156 observations: Observations,
157 ) -> None:
158 """
159 Registers the given configs and scores.
161 Parameters
162 ----------
163 observations: Observations
164 The set of observations to register.
165 """
166 pass # pylint: disable=unnecessary-pass # pragma: no cover
168 def suggest(
169 self,
170 *,
171 context: Optional[pd.Series] = None,
172 defaults: bool = False,
173 ) -> Suggestion:
174 """
175 Wrapper method, which employs the space adapter (if any), after suggesting a new
176 configuration.
178 Parameters
179 ----------
180 context : pandas.Series
181 Not Yet Implemented.
182 defaults : bool
183 Whether or not to return the default config instead of an optimizer guided one.
184 By default, use the one from the optimizer.
186 Returns
187 -------
188 suggestion: Suggestion
189 The suggested point to evaluate.
190 """
191 if defaults:
192 configuration = config_to_series(self.parameter_space.get_default_configuration())
193 if self.space_adapter is not None:
194 configuration = self.space_adapter.inverse_transform(configuration)
195 suggestion = Suggestion(config=configuration, context=context, metadata=None)
196 else:
197 suggestion = self._suggest(context=context)
198 assert set(suggestion.config.index).issubset(set(self.optimizer_parameter_space)), (
199 "Optimizer suggested a configuration that does "
200 "not match the expected parameter space."
201 )
202 if self._space_adapter:
203 suggestion = Suggestion(
204 config=self._space_adapter.transform(suggestion.config),
205 context=suggestion.context,
206 metadata=suggestion.metadata,
207 )
208 assert set(suggestion.config.index).issubset(set(self.parameter_space)), (
209 "Space adapter produced a configuration that does "
210 "not match the expected parameter space."
211 )
212 return suggestion
214 @abstractmethod
215 def _suggest(
216 self,
217 *,
218 context: Optional[pd.Series] = None,
219 ) -> Suggestion:
220 """
221 Suggests a new configuration.
223 Parameters
224 ----------
225 context : pandas.Series
226 Not Yet Implemented.
228 Returns
229 -------
230 suggestion: Suggestion
231 The suggestion to evaluate.
232 """
233 pass # pylint: disable=unnecessary-pass # pragma: no cover
235 @abstractmethod
236 def register_pending(self, pending: Suggestion) -> None:
237 """
238 Registers the given suggestion as "pending". That is it say, it has been
239 suggested by the optimizer, and an experiment trial has been started. This can
240 be useful for executing multiple trials in parallel, retry logic, etc.
242 Parameters
243 ----------
244 pending: Suggestion
245 The pending suggestion to register.
246 """
247 pass # pylint: disable=unnecessary-pass # pragma: no cover
249 def get_observations(self) -> Observations:
250 """
251 Returns the observations as a triplet of DataFrames (config, score, context).
253 Returns
254 -------
255 observations : Observations
256 All the observations registered so far.
257 """
258 if len(self._observations) == 0:
259 raise ValueError("No observations registered yet.")
260 return self._observations
262 def get_best_observations(
263 self,
264 n_max: int = 1,
265 ) -> Observations:
266 """
267 Get the N best observations so far as a filtered version of Observations.
268 Default is N=1. The columns are ordered in ASCENDING order of the optimization
269 targets. The function uses `pandas.DataFrame.nsmallest(..., keep="first")`
270 method under the hood.
272 Parameters
273 ----------
274 n_max : int
275 Maximum number of best observations to return. Default is 1.
277 Returns
278 -------
279 observations : Observations
280 A filtered version of Observations with the best N observations.
281 """
282 observations = self.get_observations()
283 if len(observations) == 0:
284 raise ValueError("No observations registered yet.")
286 idx = observations.scores.nsmallest(
287 n_max,
288 columns=self._optimization_targets,
289 keep="first",
290 ).index
291 return observations.filter_by_index(idx)
293 def cleanup(self) -> None:
294 """
295 Remove temp files, release resources, etc.
297 after use. Default is no-op. Redefine this method in optimizers that require
298 cleanup.
299 """
301 def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame:
302 """Convert numpy array from one-hot encoding to a DataFrame with categoricals
303 and ints in proper columns.
304 """
305 df_dict = collections.defaultdict(list)
306 for i in range(config.shape[0]):
307 j = 0
308 for param in self.optimizer_parameter_space.values():
309 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
310 for offset, val in enumerate(param.choices):
311 if config[i][j + offset] == 1:
312 df_dict[param.name].append(val)
313 break
314 j += len(param.choices)
315 else:
316 val = config[i][j]
317 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter):
318 val = int(val)
319 df_dict[param.name].append(val)
320 j += 1
321 return pd.DataFrame(df_dict)
323 def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray:
324 """Convert pandas DataFrame to one-hot-encoded numpy array."""
325 n_cols = 0
326 n_rows = config.shape[0] if config.ndim > 1 else 1
327 for param in self.optimizer_parameter_space.values():
328 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
329 n_cols += len(param.choices)
330 else:
331 n_cols += 1
332 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32)
333 for i in range(n_rows):
334 j = 0
335 for param in self.optimizer_parameter_space.values():
336 if config.ndim > 1:
337 assert isinstance(config, pd.DataFrame)
338 col = config.columns.get_loc(param.name)
339 assert isinstance(col, int)
340 val = config.iloc[i, col]
341 else:
342 assert isinstance(config, pd.Series)
343 col = config.index.get_loc(param.name)
344 assert isinstance(col, int)
345 val = config.iloc[col]
346 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
347 offset = param.choices.index(val)
348 one_hot[i][j + offset] = 1
349 j += len(param.choices)
350 else:
351 one_hot[i][j] = val
352 j += 1
353 return one_hot