Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 94%
126 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Base class for an interface between the benchmarking framework and mlos_core
6optimizers.
7"""
9import logging
10from abc import ABCMeta, abstractmethod
11from types import TracebackType
12from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union
14from ConfigSpace import ConfigurationSpace
16from mlos_bench.config.schemas import ConfigSchema
17from mlos_bench.environments.status import Status
18from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace
19from mlos_bench.services.base_service import Service
20from mlos_bench.tunables.tunable import TunableValue
21from mlos_bench.tunables.tunable_groups import TunableGroups
22from mlos_bench.util import strtobool
24_LOG = logging.getLogger(__name__)
27class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes
28 """An abstract interface between the benchmarking framework and mlos_core
29 optimizers.
30 """
32 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json
33 BASE_SUPPORTED_CONFIG_PROPS = {
34 "optimization_targets",
35 "max_suggestions",
36 "seed",
37 "start_with_defaults",
38 }
40 def __init__(
41 self,
42 tunables: TunableGroups,
43 config: dict,
44 global_config: Optional[dict] = None,
45 service: Optional[Service] = None,
46 ):
47 """
48 Create a new optimizer for the given configuration space defined by the
49 tunables.
51 Parameters
52 ----------
53 tunables : TunableGroups
54 The tunables to optimize.
55 config : dict
56 Free-format key/value pairs of configuration parameters to pass to the optimizer.
57 global_config : Optional[dict]
58 service : Optional[Service]
59 """
60 _LOG.info("Create optimizer for: %s", tunables)
61 _LOG.debug("Optimizer config: %s", config)
62 self._validate_json_config(config)
63 self._config = config.copy()
64 self._global_config = global_config or {}
65 self._tunables = tunables
66 self._config_space: Optional[ConfigurationSpace] = None
67 self._service = service
68 self._seed = int(config.get("seed", 42))
69 self._in_context = False
71 experiment_id = self._global_config.get("experiment_id")
72 self.experiment_id = str(experiment_id).strip() if experiment_id else None
74 self._iter = 0
75 # If False, use the optimizer to suggest the initial configuration;
76 # if True (default), use the already initialized values for the first iteration.
77 self._start_with_defaults: bool = bool(
78 strtobool(str(self._config.pop("start_with_defaults", True)))
79 )
80 self._max_suggestions = int(self._config.pop("max_suggestions", 100))
82 opt_targets: Dict[str, str] = self._config.pop("optimization_targets", {"score": "min"})
83 self._opt_targets: Dict[str, Literal[1, -1]] = {}
84 for opt_target, opt_dir in opt_targets.items():
85 if opt_dir == "min":
86 self._opt_targets[opt_target] = 1
87 elif opt_dir == "max":
88 self._opt_targets[opt_target] = -1
89 else:
90 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}")
92 def _validate_json_config(self, config: dict) -> None:
93 """Reconstructs a basic json config that this class might have been instantiated
94 from in order to validate configs provided outside the file loading
95 mechanism.
96 """
97 json_config: dict = {
98 "class": self.__class__.__module__ + "." + self.__class__.__name__,
99 }
100 if config:
101 json_config["config"] = config
102 ConfigSchema.OPTIMIZER.validate(json_config)
104 def __repr__(self) -> str:
105 opt_targets = ",".join(
106 f"{opt_target}:{({1: 'min', -1: 'max'}[opt_dir])}"
107 for (opt_target, opt_dir) in self._opt_targets.items()
108 )
109 return f"{self.name}({opt_targets},config={self._config})"
111 def __enter__(self) -> "Optimizer":
112 """Enter the optimizer's context."""
113 _LOG.debug("Optimizer START :: %s", self)
114 assert not self._in_context
115 self._in_context = True
116 return self
118 def __exit__(
119 self,
120 ex_type: Optional[Type[BaseException]],
121 ex_val: Optional[BaseException],
122 ex_tb: Optional[TracebackType],
123 ) -> Literal[False]:
124 """Exit the context of the optimizer."""
125 if ex_val is None:
126 _LOG.debug("Optimizer END :: %s", self)
127 else:
128 assert ex_type and ex_val
129 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb))
130 assert self._in_context
131 self._in_context = False
132 return False # Do not suppress exceptions
134 @property
135 def current_iteration(self) -> int:
136 """
137 The current number of iterations (suggestions) registered.
139 Note: this may or may not be the same as the number of configurations.
140 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
141 """
142 return self._iter
144 @property
145 def max_suggestions(self) -> int:
146 """
147 The maximum number of iterations (suggestions) to run.
149 Note: this may or may not be the same as the number of configurations.
150 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.
151 """
152 return self._max_suggestions
154 @property
155 def seed(self) -> int:
156 """The random seed for the optimizer."""
157 return self._seed
159 @property
160 def start_with_defaults(self) -> bool:
161 """
162 Return True if the optimizer should start with the default values.
164 Note: This parameter is mutable and will be reset to False after the
165 defaults are first suggested.
166 """
167 return self._start_with_defaults
169 @property
170 def tunable_params(self) -> TunableGroups:
171 """
172 Get the tunable parameters of the optimizer as TunableGroups.
174 Returns
175 -------
176 tunables : TunableGroups
177 A collection of covariant groups of tunable parameters.
178 """
179 return self._tunables
181 @property
182 def config_space(self) -> ConfigurationSpace:
183 """
184 Get the tunable parameters of the optimizer as a ConfigurationSpace.
186 Returns
187 -------
188 ConfigSpace.ConfigurationSpace
189 The ConfigSpace representation of the tunable parameters.
190 """
191 if self._config_space is None:
192 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed)
193 _LOG.debug("ConfigSpace: %s", self._config_space)
194 return self._config_space
196 @property
197 def name(self) -> str:
198 """
199 The name of the optimizer.
201 We save this information in mlos_bench storage to track the source of each
202 configuration.
203 """
204 return self.__class__.__name__
206 @property
207 def targets(self) -> Dict[str, Literal["min", "max"]]:
208 """Returns a dictionary of optimization targets and their direction."""
209 return {
210 opt_target: "min" if opt_dir == 1 else "max"
211 for (opt_target, opt_dir) in self._opt_targets.items()
212 }
214 @property
215 def supports_preload(self) -> bool:
216 """Return True if the optimizer supports pre-loading the data from previous
217 experiments.
218 """
219 return True
221 @abstractmethod
222 def bulk_register(
223 self,
224 configs: Sequence[dict],
225 scores: Sequence[Optional[Dict[str, TunableValue]]],
226 status: Optional[Sequence[Status]] = None,
227 ) -> bool:
228 """
229 Pre-load the optimizer with the bulk data from previous experiments.
231 Parameters
232 ----------
233 configs : Sequence[dict]
234 Records of tunable values from other experiments.
235 scores : Sequence[Optional[Dict[str, TunableValue]]]
236 Benchmark results from experiments that correspond to `configs`.
237 status : Optional[Sequence[Status]]
238 Status of the experiments that correspond to `configs`.
240 Returns
241 -------
242 is_not_empty : bool
243 True if there is data to register, false otherwise.
244 """
245 _LOG.info(
246 "Update the optimizer with: %d configs, %d scores, %d status values",
247 len(configs or []),
248 len(scores or []),
249 len(status or []),
250 )
251 if len(configs or []) != len(scores or []):
252 raise ValueError("Numbers of configs and scores do not match.")
253 if status is not None and len(configs or []) != len(status or []):
254 raise ValueError("Numbers of configs and status values do not match.")
255 has_data = bool(configs and scores)
256 if has_data and self._start_with_defaults:
257 _LOG.info("Prior data exists - do *NOT* use the default initialization.")
258 self._start_with_defaults = False
259 return has_data
261 def suggest(self) -> TunableGroups:
262 """
263 Generate the next suggestion. Base class' implementation increments the
264 iteration count and returns the current values of the tunables.
266 Returns
267 -------
268 tunables : TunableGroups
269 The next configuration to benchmark.
270 These are the same tunables we pass to the constructor,
271 but with the values set to the next suggestion.
272 """
273 self._iter += 1
274 _LOG.debug("Iteration %d :: Suggest", self._iter)
275 return self._tunables.copy()
277 @abstractmethod
278 def register(
279 self,
280 tunables: TunableGroups,
281 status: Status,
282 score: Optional[Dict[str, TunableValue]] = None,
283 ) -> Optional[Dict[str, float]]:
284 """
285 Register the observation for the given configuration.
287 Parameters
288 ----------
289 tunables : TunableGroups
290 The configuration that has been benchmarked.
291 Usually it's the same config that the `.suggest()` method returned.
292 status : Status
293 Final status of the experiment (e.g., SUCCEEDED or FAILED).
294 score : Optional[Dict[str, TunableValue]]
295 A dict with the final benchmark results.
296 None if the experiment was not successful.
298 Returns
299 -------
300 value : Optional[Dict[str, float]]
301 Benchmark scores extracted (and possibly transformed)
302 from the dataframe that's being MINIMIZED.
303 """
304 _LOG.info(
305 "Iteration %d :: Register: %s = %s score: %s",
306 self._iter,
307 tunables,
308 status,
309 score,
310 )
311 if status.is_succeeded() == (score is None): # XOR
312 raise ValueError("Status and score must be consistent.")
313 return self._get_scores(status, score)
315 def _get_scores(
316 self,
317 status: Status,
318 scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]],
319 ) -> Optional[Dict[str, float]]:
320 """
321 Extract a scalar benchmark score from the dataframe. Change the sign if we are
322 maximizing.
324 Parameters
325 ----------
326 status : Status
327 Final status of the experiment (e.g., SUCCEEDED or FAILED).
328 scores : Optional[Dict[str, TunableValue]]
329 A dict with the final benchmark results.
330 None if the experiment was not successful.
332 Returns
333 -------
334 score : Optional[Dict[str, float]]
335 An optional dict of benchmark scores to be used as targets for MINIMIZATION.
336 """
337 if not status.is_completed():
338 return None
340 if not status.is_succeeded():
341 assert scores is None
342 # TODO: Be more flexible with values used for failed trials (not just +inf).
343 # Issue: https://github.com/microsoft/MLOS/issues/523
344 return {opt_target: float("inf") for opt_target in self._opt_targets}
346 assert scores is not None
347 target_metrics: Dict[str, float] = {}
348 for opt_target, opt_dir in self._opt_targets.items():
349 val = scores[opt_target]
350 assert val is not None
351 target_metrics[opt_target] = float(val) * opt_dir
353 return target_metrics
355 def not_converged(self) -> bool:
356 """
357 Return True if not converged, False otherwise.
359 Base implementation just checks the iteration count.
360 """
361 return self._iter < self._max_suggestions
363 @abstractmethod
364 def get_best_observation(
365 self,
366 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
367 """
368 Get the best observation so far.
370 Returns
371 -------
372 (value, tunables) : Tuple[Dict[str, float], TunableGroups]
373 The best value and the corresponding configuration.
374 (None, None) if no successful observation has been registered yet.
375 """