Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 95%
131 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Base class for an interface between the benchmarking framework
7and mlos_core optimizers.
8"""
10import logging
11from abc import ABCMeta, abstractmethod
12from distutils.util import strtobool # pylint: disable=deprecated-module
14from types import TracebackType
15from typing import Dict, Optional, Sequence, Tuple, Type, Union
16from typing_extensions import Literal
18from ConfigSpace import ConfigurationSpace
20from mlos_bench.config.schemas import ConfigSchema
21from mlos_bench.services.base_service import Service
22from mlos_bench.environments.status import Status
23from mlos_bench.tunables.tunable import TunableValue
24from mlos_bench.tunables.tunable_groups import TunableGroups
25from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace
27_LOG = logging.getLogger(__name__)
30class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes
31 """
32 An abstract interface between the benchmarking framework and mlos_core optimizers.
33 """
35 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json
36 BASE_SUPPORTED_CONFIG_PROPS = {
37 "optimization_targets",
38 "max_suggestions",
39 "seed",
40 "start_with_defaults",
41 }
43 def __init__(self,
44 tunables: TunableGroups,
45 config: dict,
46 global_config: Optional[dict] = None,
47 service: Optional[Service] = None):
48 """
49 Create a new optimizer for the given configuration space defined by the tunables.
51 Parameters
52 ----------
53 tunables : TunableGroups
54 The tunables to optimize.
55 config : dict
56 Free-format key/value pairs of configuration parameters to pass to the optimizer.
57 global_config : Optional[dict]
58 service : Optional[Service]
59 """
60 _LOG.info("Create optimizer for: %s", tunables)
61 _LOG.debug("Optimizer config: %s", config)
62 self._validate_json_config(config)
63 self._config = config.copy()
64 self._global_config = global_config or {}
65 self._tunables = tunables
66 self._config_space: Optional[ConfigurationSpace] = None
67 self._service = service
68 self._seed = int(config.get("seed", 42))
69 self._in_context = False
71 experiment_id = self._global_config.get('experiment_id')
72 self.experiment_id = str(experiment_id).strip() if experiment_id else None
74 self._iter = 0
75 # If False, use the optimizer to suggest the initial configuration;
76 # if True (default), use the already initialized values for the first iteration.
77 self._start_with_defaults: bool = bool(
78 strtobool(str(self._config.pop('start_with_defaults', True))))
79 self._max_iter = int(self._config.pop('max_suggestions', 100))
81 opt_targets: Dict[str, str] = self._config.pop('optimization_targets', {'score': 'min'})
82 if not isinstance(opt_targets, dict):
83 raise ValueError(f"optimization_targets should be a dict: {opt_targets}")
84 # TODO: Implement multi-target optimization.
85 if len(opt_targets) != 1:
86 raise NotImplementedError("Multi-target optimization is not implemented.")
87 (self._opt_target, opt_dir) = list(opt_targets.items())[0]
88 self._opt_sign = {"min": 1, "max": -1}[opt_dir]
90 def _validate_json_config(self, config: dict) -> None:
91 """
92 Reconstructs a basic json config that this class might have been
93 instantiated from in order to validate configs provided outside the
94 file loading mechanism.
95 """
96 json_config: dict = {
97 "class": self.__class__.__module__ + "." + self.__class__.__name__,
98 }
99 if config:
100 json_config["config"] = config
101 ConfigSchema.OPTIMIZER.validate(json_config)
103 def __repr__(self) -> str:
104 opt_direction = 'min' if self.is_min else 'max'
105 return f"{self.name}:{opt_direction}({self.target})(config={self._config})"
107 def __enter__(self) -> 'Optimizer':
108 """
109 Enter the optimizer's context.
110 """
111 _LOG.debug("Optimizer START :: %s", self)
112 assert not self._in_context
113 self._in_context = True
114 return self
116 def __exit__(self, ex_type: Optional[Type[BaseException]],
117 ex_val: Optional[BaseException],
118 ex_tb: Optional[TracebackType]) -> Literal[False]:
119 """
120 Exit the context of the optimizer.
121 """
122 if ex_val is None:
123 _LOG.debug("Optimizer END :: %s", self)
124 else:
125 assert ex_type and ex_val
126 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb))
127 assert self._in_context
128 self._in_context = False
129 return False # Do not suppress exceptions
131 @property
132 def current_iteration(self) -> int:
133 """
134 The current number of iterations (trials) registered.
136 Note: this may or may not be the same as the number of configurations.
137 See Also: Launcher.trial_config_repeat_count.
138 """
139 return self._iter
141 @property
142 def max_iterations(self) -> int:
143 """
144 The maximum number of iterations (trials) to run.
146 Note: this may or may not be the same as the number of configurations.
147 See Also: Launcher.trial_config_repeat_count.
148 """
149 return self._max_iter
151 @property
152 def seed(self) -> int:
153 """
154 The random seed for the optimizer.
155 """
156 return self._seed
158 @property
159 def start_with_defaults(self) -> bool:
160 """
161 Return True if the optimizer should start with the default values.
162 Note: This parameter is mutable and will be reset to False after the
163 defaults are first suggested.
164 """
165 return self._start_with_defaults
167 @property
168 def tunable_params(self) -> TunableGroups:
169 """
170 Get the tunable parameters of the optimizer as TunableGroups.
172 Returns
173 -------
174 tunables : TunableGroups
175 A collection of covariant groups of tunable parameters.
176 """
177 return self._tunables
179 @property
180 def config_space(self) -> ConfigurationSpace:
181 """
182 Get the tunable parameters of the optimizer as a ConfigurationSpace.
184 Returns
185 -------
186 ConfigurationSpace
187 The ConfigSpace representation of the tunable parameters.
188 """
189 if self._config_space is None:
190 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed)
191 _LOG.debug("ConfigSpace: %s", self._config_space)
192 return self._config_space
194 @property
195 def name(self) -> str:
196 """
197 The name of the optimizer. We save this information in
198 mlos_bench storage to track the source of each configuration.
199 """
200 return self.__class__.__name__
202 # TODO: Expand these properties for multi-objective.
204 @property
205 def is_min(self) -> bool:
206 """
207 True if minimizing, False otherwise. Minimization is the default.
208 """
209 return self._opt_sign > 0
211 @property
212 def target(self) -> str:
213 """
214 The name of the target metric to optimize.
215 """
216 return self._opt_target
218 @property
219 def direction(self) -> Literal['min', 'max']:
220 """
221 The direction to optimize the target metric (e.g., min or max).
222 """
223 return 'min' if self.is_min else 'max'
225 @property
226 def supports_preload(self) -> bool:
227 """
228 Return True if the optimizer supports pre-loading the data from previous experiments.
229 """
230 return True
232 @abstractmethod
233 def bulk_register(self,
234 configs: Sequence[dict],
235 scores: Sequence[Optional[Dict[str, TunableValue]]],
236 status: Optional[Sequence[Status]] = None) -> bool:
237 """
238 Pre-load the optimizer with the bulk data from previous experiments.
240 Parameters
241 ----------
242 configs : Sequence[dict]
243 Records of tunable values from other experiments.
244 scores : Sequence[Optional[Dict[str, TunableValue]]]
245 Benchmark results from experiments that correspond to `configs`.
246 status : Optional[Sequence[Status]]
247 Status of the experiments that correspond to `configs`.
249 Returns
250 -------
251 is_not_empty : bool
252 True if there is data to register, false otherwise.
253 """
254 _LOG.info("Update the optimizer with: %d configs, %d scores, %d status values",
255 len(configs or []), len(scores or []), len(status or []))
256 if len(configs or []) != len(scores or []):
257 raise ValueError("Numbers of configs and scores do not match.")
258 if status is not None and len(configs or []) != len(status or []):
259 raise ValueError("Numbers of configs and status values do not match.")
260 has_data = bool(configs and scores)
261 if has_data and self._start_with_defaults:
262 _LOG.info("Prior data exists - do *NOT* use the default initialization.")
263 self._start_with_defaults = False
264 return has_data
266 def suggest(self) -> TunableGroups:
267 """
268 Generate the next suggestion.
269 Base class' implementation increments the iteration count
270 and returns the current values of the tunables.
272 Returns
273 -------
274 tunables : TunableGroups
275 The next configuration to benchmark.
276 These are the same tunables we pass to the constructor,
277 but with the values set to the next suggestion.
278 """
279 self._iter += 1
280 _LOG.debug("Iteration %d :: Suggest", self._iter)
281 return self._tunables.copy()
283 @abstractmethod
284 def register(self, tunables: TunableGroups, status: Status,
285 score: Optional[Union[float, Dict[str, float]]] = None) -> Optional[float]:
286 """
287 Register the observation for the given configuration.
289 Parameters
290 ----------
291 tunables : TunableGroups
292 The configuration that has been benchmarked.
293 Usually it's the same config that the `.suggest()` method returned.
294 status : Status
295 Final status of the experiment (e.g., SUCCEEDED or FAILED).
296 score : Union[float, Dict[str, float]]
297 A scalar or a dict with the final benchmark results.
298 None if the experiment was not successful.
300 Returns
301 -------
302 value : float
303 The scalar benchmark score extracted (and possibly transformed) from the dataframe that's being minimized.
304 """
305 _LOG.info("Iteration %d :: Register: %s = %s score: %s",
306 self._iter, tunables, status, score)
307 if status.is_succeeded() == (score is None): # XOR
308 raise ValueError("Status and score must be consistent.")
309 return self._get_score(status, score)
311 def _get_score(self, status: Status, score: Optional[Union[float, Dict[str, float]]]) -> Optional[float]:
312 """
313 Extract a scalar benchmark score from the dataframe.
314 Change the sign if we are maximizing.
316 Parameters
317 ----------
318 status : Status
319 Final status of the experiment (e.g., SUCCEEDED or FAILED).
320 score : Union[float, Dict[str, float]]
321 A scalar or a dict with the final benchmark results.
322 None if the experiment was not successful.
324 Returns
325 -------
326 score : float
327 A scalar benchmark score to be used as a primary target for MINIMIZATION.
328 """
329 if not status.is_completed():
330 return None
331 if status.is_succeeded():
332 assert score is not None
333 if isinstance(score, dict):
334 if self._opt_target not in score:
335 raise ValueError(f"Missing expected optimization target metric '{self._opt_target}' "
336 + f"in results for iteration {self._iter}: {score}")
337 score = score[self._opt_target]
338 return float(score) * self._opt_sign
339 assert score is None
340 return float("inf")
342 def not_converged(self) -> bool:
343 """
344 Return True if not converged, False otherwise.
345 Base implementation just checks the iteration count.
346 """
347 return self._iter < self._max_iter
349 @abstractmethod
350 def get_best_observation(self) -> Union[Tuple[float, TunableGroups], Tuple[None, None]]:
351 """
352 Get the best observation so far.
354 Returns
355 -------
356 (value, tunables) : Tuple[float, TunableGroups]
357 The best value and the corresponding configuration.
358 (None, None) if no successful observation has been registered yet.
359 """