Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 99%
115 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-06 00:35 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-06 00:35 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Contains the BaseOptimizer abstract class.
7"""
9import collections
10from abc import ABCMeta, abstractmethod
11from typing import List, Optional, Tuple, Union
13import ConfigSpace
14import numpy as np
15import numpy.typing as npt
16import pandas as pd
18from mlos_core.util import config_to_dataframe
19from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter
22class BaseOptimizer(metaclass=ABCMeta):
23 """
24 Optimizer abstract base class defining the basic interface.
25 """
27 def __init__(self, *,
28 parameter_space: ConfigSpace.ConfigurationSpace,
29 space_adapter: Optional[BaseSpaceAdapter] = None):
30 """
31 Create a new instance of the base optimizer.
33 Parameters
34 ----------
35 parameter_space : ConfigSpace.ConfigurationSpace
36 The parameter space to optimize.
37 space_adapter : BaseSpaceAdapter
38 The space adapter class to employ for parameter space transformations.
39 """
40 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space
41 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = \
42 parameter_space if space_adapter is None else space_adapter.target_parameter_space
44 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space:
45 raise ValueError("Given parameter space differs from the one given to space adapter")
47 self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter
48 self._observations: List[Tuple[pd.DataFrame, pd.Series, Optional[pd.DataFrame]]] = []
49 self._has_context: Optional[bool] = None
50 self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = []
52 def __repr__(self) -> str:
53 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})"
55 @property
56 def space_adapter(self) -> Optional[BaseSpaceAdapter]:
57 """Get the space adapter instance (if any)."""
58 return self._space_adapter
60 def register(self, configurations: pd.DataFrame, scores: pd.Series,
61 context: Optional[pd.DataFrame] = None) -> None:
62 """Wrapper method, which employs the space adapter (if any), before registering the configurations and scores.
64 Parameters
65 ----------
66 configurations : pd.DataFrame
67 Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
68 scores : pd.Series
69 Scores from running the configurations. The index is the same as the index of the configurations.
71 context : pd.DataFrame
72 Not Yet Implemented.
73 """
74 # Do some input validation.
75 assert self._has_context is None or self._has_context ^ (context is None), \
76 "Context must always be added or never be added."
77 assert len(configurations) == len(scores), \
78 "Mismatched number of configurations and scores."
79 if context is not None:
80 assert len(configurations) == len(context), \
81 "Mismatched number of configurations and context."
82 assert configurations.shape[1] == len(self.parameter_space.values()), \
83 "Mismatched configuration shape."
84 self._observations.append((configurations, scores, context))
85 self._has_context = context is not None
87 if self._space_adapter:
88 configurations = self._space_adapter.inverse_transform(configurations)
89 assert configurations.shape[1] == len(self.optimizer_parameter_space.values()), \
90 "Mismatched configuration shape after inverse transform."
91 return self._register(configurations, scores, context)
93 @abstractmethod
94 def _register(self, configurations: pd.DataFrame, scores: pd.Series,
95 context: Optional[pd.DataFrame] = None) -> None:
96 """Registers the given configurations and scores.
98 Parameters
99 ----------
100 configurations : pd.DataFrame
101 Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
102 scores : pd.Series
103 Scores from running the configurations. The index is the same as the index of the configurations.
105 context : pd.DataFrame
106 Not Yet Implemented.
107 """
108 pass # pylint: disable=unnecessary-pass # pragma: no cover
110 def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame:
111 """
112 Wrapper method, which employs the space adapter (if any), after suggesting a new configuration.
114 Parameters
115 ----------
116 context : pd.DataFrame
117 Not Yet Implemented.
118 defaults : bool
119 Whether or not to return the default config instead of an optimizer guided one.
120 By default, use the one from the optimizer.
122 Returns
123 -------
124 configuration : pd.DataFrame
125 Pandas dataframe with a single row. Column names are the parameter names.
126 """
127 if defaults:
128 configuration = config_to_dataframe(self.parameter_space.get_default_configuration())
129 if self.space_adapter is not None:
130 configuration = self.space_adapter.inverse_transform(configuration)
131 else:
132 configuration = self._suggest(context)
133 assert len(configuration) == 1, \
134 "Suggest must return a single configuration."
135 assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \
136 "Optimizer suggested a configuration that does not match the expected parameter space."
137 if self._space_adapter:
138 configuration = self._space_adapter.transform(configuration)
139 assert set(configuration.columns).issubset(set(self.parameter_space)), \
140 "Space adapter produced a configuration that does not match the expected parameter space."
141 return configuration
143 @abstractmethod
144 def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame:
145 """Suggests a new configuration.
147 Parameters
148 ----------
149 context : pd.DataFrame
150 Not Yet Implemented.
152 Returns
153 -------
154 configuration : pd.DataFrame
155 Pandas dataframe with a single row. Column names are the parameter names.
156 """
157 pass # pylint: disable=unnecessary-pass # pragma: no cover
159 @abstractmethod
160 def register_pending(self, configurations: pd.DataFrame,
161 context: Optional[pd.DataFrame] = None) -> None:
162 """Registers the given configurations as "pending".
163 That is it say, it has been suggested by the optimizer, and an experiment trial has been started.
164 This can be useful for executing multiple trials in parallel, retry logic, etc.
166 Parameters
167 ----------
168 configurations : pd.DataFrame
169 Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations.
170 context : pd.DataFrame
171 Not Yet Implemented.
172 """
173 pass # pylint: disable=unnecessary-pass # pragma: no cover
175 def get_observations(self) -> pd.DataFrame:
176 """Returns the observations as a dataframe.
178 Returns
179 -------
180 observations : pd.DataFrame
181 Dataframe of observations. The columns are parameter names and "score" for the score, each row is an observation.
182 """
183 if len(self._observations) == 0:
184 raise ValueError("No observations registered yet.")
185 configs = pd.concat([config for config, _, _ in self._observations])
186 scores = pd.concat([score for _, score, _ in self._observations])
187 try:
188 contexts = pd.concat([context for _, _, context in self._observations if context is not None])
189 except ValueError:
190 contexts = None
191 configs["score"] = scores
192 if contexts is not None:
193 # configs = pd.concat([configs, contexts], axis=1)
194 # Not reachable for now
195 raise NotImplementedError()
196 return configs
198 def get_best_observation(self) -> pd.DataFrame:
199 """Returns the best observation so far as a dataframe.
201 Returns
202 -------
203 best_observation : pd.DataFrame
204 Dataframe with a single row containing the best observation. The columns are parameter names and "score" for the score.
205 """
206 if len(self._observations) == 0:
207 raise ValueError("No observations registered yet.")
208 observations = self.get_observations()
209 return observations.nsmallest(1, columns='score')
211 def cleanup(self) -> None:
212 """
213 Remove temp files, release resources, etc. after use. Default is no-op.
214 Redefine this method in optimizers that require cleanup.
215 """
217 def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame:
218 """
219 Convert numpy array from one-hot encoding to a DataFrame
220 with categoricals and ints in proper columns.
221 """
222 df_dict = collections.defaultdict(list)
223 for i in range(config.shape[0]):
224 j = 0
225 for param in self.optimizer_parameter_space.values():
226 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
227 for (offset, val) in enumerate(param.choices):
228 if config[i][j + offset] == 1:
229 df_dict[param.name].append(val)
230 break
231 j += len(param.choices)
232 else:
233 val = config[i][j]
234 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter):
235 val = int(val)
236 df_dict[param.name].append(val)
237 j += 1
238 return pd.DataFrame(df_dict)
240 def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray:
241 """
242 Convert pandas DataFrame to one-hot-encoded numpy array.
243 """
244 n_cols = 0
245 n_rows = config.shape[0] if config.ndim > 1 else 1
246 for param in self.optimizer_parameter_space.values():
247 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
248 n_cols += len(param.choices)
249 else:
250 n_cols += 1
251 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32)
252 for i in range(n_rows):
253 j = 0
254 for param in self.optimizer_parameter_space.values():
255 if config.ndim > 1:
256 assert isinstance(config, pd.DataFrame)
257 col = config.columns.get_loc(param.name)
258 assert isinstance(col, int)
259 val = config.iloc[i, col]
260 else:
261 assert isinstance(config, pd.Series)
262 col = config.index.get_loc(param.name)
263 assert isinstance(col, int)
264 val = config.iloc[col]
265 if isinstance(param, ConfigSpace.CategoricalHyperparameter):
266 offset = param.choices.index(val)
267 one_hot[i][j + offset] = 1
268 j += len(param.choices)
269 else:
270 one_hot[i][j] = val
271 j += 1
272 return one_hot