Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%
113 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 00:52 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 00:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6A wrapper for :py:mod:`mlos_core.optimizers` for :py:mod:`mlos_bench`.
8Config
9------
10:py:mod:`mlos_bench.optimizers` has an overview of the configuration options for
11the py:mod:`.MlosCoreOptimizer`.
13See Also
14--------
15:py:mod:`mlos_bench.optimizers` :
16 Another working example of an :py:class:`.MlosCoreOptimizer`.
17:py:mod:`mlos_core.optimizers` :
18 Documentation on the underlying mlos_core Optimizers.
19:py:mod:`mlos_core.spaces.adapters` :
20 Documentation on the underlying mlos_core SpaceAdapters.
22Examples
23--------
24Load tunables from a JSON string.
25Note: normally these would be automatically loaded from the
26:py:mod:`~mlos_bench.environments.base_environment.Environment`'s
27``include_tunables`` config parameter.
29>>> import json5 as json
30>>> import mlos_core.optimizers
31>>> from mlos_bench.environments.status import Status
32>>> from mlos_bench.services.config_persistence import ConfigPersistenceService
33>>> service = ConfigPersistenceService()
34>>> json_config = '''
35... {
36... "group_1": {
37... "cost": 1,
38... "params": {
39... "flags": {
40... "type": "categorical",
41... "values": ["on", "off", "auto"],
42... "default": "auto",
43... },
44... "int_param": {
45... "type": "int",
46... "range": [1, 100],
47... "default": 10,
48... },
49... "float_param": {
50... "type": "float",
51... "range": [0, 100],
52... "default": 50.0,
53... }
54... }
55... }
56... }
57... '''
58>>> tunables = service.load_tunables(jsons=[json_config])
59>>> # Here's the defaults:
60>>> tunables.get_param_values()
61{'flags': 'auto', 'int_param': 10, 'float_param': 50.0}
63When using the :py:class:`.MlosCoreOptimizer`, we can also specify some
64additional properties, for instance the ``optimizer_type``, which is one of the
65mlos_core :py:data:`~mlos_core.optimizers.OptimizerType` enum values:
67>>> import mlos_core.optimizers
68>>> print([member.name for member in mlos_core.optimizers.OptimizerType])
69['RANDOM', 'FLAML', 'SMAC']
71These may also include their own configuration options, which can be specified
72as additional key-value pairs in the ``config`` section, where each key-value
73corresponds to an argument to the respective OptimizerTypes's constructor.
74See :py:meth:`mlos_core.optimizers.OptimizerFactory.create` for more details.
76Other Optimizers may also have their own configuration options.
77See each class' documentation for details.
79When using :py:class:`.MlosCoreOptimizer`, we can also specify an optional an
80``space_adapter_type``, which can sometimes help manipulate the configuration
81space to something more manageable. It should be one of the following
82:py:data:`~mlos_core.spaces.adapters.SpaceAdapterType` enum values:
84>>> import mlos_core.spaces.adapters
85>>> print([member.name for member in mlos_core.spaces.adapters.SpaceAdapterType])
86['IDENTITY', 'LLAMATUNE']
88These may also include their own configuration options, which can be specified
89as additional key-value pairs in the optional ``space_adapter_config`` section,
90where each key-value corresponds to an argument to the respective
91OptimizerTypes's constructor. See
92:py:meth:`mlos_core.spaces.adapters.SpaceAdapterFactory.create` for more details.
94Here's an example JSON config for an :py:class:`.MlosCoreOptimizer`.
96>>> optimizer_json_config = '''
97... {
98... "class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",
99... "description": "MlosCoreOptimizer",
100... "config": {
101... "max_suggestions": 1000,
102... "optimization_targets": {
103... "throughput": "max",
104... "cost": "min",
105... },
106... "start_with_defaults": true,
107... "seed": 42,
108... // Override the default optimizer type
109... // Must be one of the mlos_core OptimizerType enum values.
110... "optimizer_type": "SMAC",
111... // Optionally provide some additional configuration options for the optimizer.
112... // Note: these are optimizer-specific and may not be supported by all optimizers.
113... "n_random_init": 25,
114... "n_random_probability": 0.01,
115... // Optionally override the default space adapter type
116... // Must be one of the mlos_core SpaceAdapterType enum values.
117... // LlamaTune is a method for automatically doing space reduction
118... // from the original space.
119... "space_adapter_type": "LLAMATUNE",
120... "space_adapter_config": {
121... // Note: these values are probably too low,
122... // but it's just for demonstration.
123... "num_low_dims": 2,
124... "max_unique_values_per_param": 10,
125... }
126... }
127... }
128... '''
130That config will typically be loaded via the ``--optimizer`` command-line
131argument to the :py:mod:`mlos_bench <mlos_bench.run>` CLI.
132However, for demonstration purposes, we can load it directly here:
134>>> config = json.loads(optimizer_json_config)
135>>> optimizer = service.build_optimizer(
136... tunables=tunables,
137... service=service,
138... config=config,
139... )
141Internally the Scheduler will call the Optimizer's methods to suggest
142configurations, like so:
144>>> suggested_config_1 = optimizer.suggest()
145>>> # Normally default values should be suggested first, per json config.
146>>> # However, since LlamaTune is being employed here, the first suggestion may
147>>> # be projected to a slightly different space.
148>>> suggested_config_1.get_param_values()
149{'flags': 'auto', 'int_param': 1, 'float_param': 55.5555555555556}
150>>> # Get another suggestion.
151>>> # Note that multiple suggestions can be pending prior to
152>>> # registering their scores, supporting parallel trial execution.
153>>> suggested_config_2 = optimizer.suggest()
154>>> suggested_config_2.get_param_values()
155{'flags': 'on', 'int_param': 78, 'float_param': 88.8888888888889}
156>>> # Register some scores.
157>>> # Note: Maximization problems track negative scores to produce a minimization problem.
158>>> optimizer.register(suggested_config_1, Status.SUCCEEDED, {"throughput": 42, "cost": 19})
159{'throughput': -42.0, 'cost': 19.0}
160>>> optimizer.register(suggested_config_2, Status.SUCCEEDED, {"throughput": 7, "cost": 17.2})
161{'throughput': -7.0, 'cost': 17.2}
162>>> (best_score, best_config) = optimizer.get_best_observation()
163>>> best_score
164{'throughput': 42.0, 'cost': 19.0}
165>>> assert best_config == suggested_config_1
166"""
168import logging
169import os
170from collections.abc import Sequence
171from types import TracebackType
172from typing import Literal
174import pandas as pd
176from mlos_bench.environments.status import Status
177from mlos_bench.optimizers.base_optimizer import Optimizer
178from mlos_bench.optimizers.convert_configspace import (
179 TunableValueKind,
180 configspace_data_to_tunable_values,
181 special_param_names,
182)
183from mlos_bench.services.base_service import Service
184from mlos_bench.tunables.tunable_groups import TunableGroups
185from mlos_bench.tunables.tunable_types import TunableValue
186from mlos_core.data_classes import Observations
187from mlos_core.optimizers import (
188 DEFAULT_OPTIMIZER_TYPE,
189 BaseOptimizer,
190 OptimizerFactory,
191 OptimizerType,
192 SpaceAdapterType,
193)
195_LOG = logging.getLogger(__name__)
198class MlosCoreOptimizer(Optimizer):
199 """A wrapper class for the :py:mod:`mlos_core.optimizers`."""
201 def __init__(
202 self,
203 tunables: TunableGroups,
204 config: dict,
205 global_config: dict | None = None,
206 service: Service | None = None,
207 ):
208 super().__init__(tunables, config, global_config, service)
210 opt_type = getattr(
211 OptimizerType, self._config.pop("optimizer_type", DEFAULT_OPTIMIZER_TYPE.name)
212 )
214 if opt_type == OptimizerType.SMAC:
215 output_directory = self._config.get("output_directory")
216 if output_directory is not None:
217 # If output_directory is specified, turn it into an absolute path.
218 self._config["output_directory"] = os.path.abspath(output_directory)
219 else:
220 _LOG.warning(
221 "SMAC optimizer output_directory was null. "
222 "SMAC will use a temporary directory."
223 )
225 # Make sure max_trials >= max_suggestions.
226 if "max_trials" not in self._config:
227 self._config["max_trials"] = self._max_suggestions
228 assert int(self._config["max_trials"]) >= self._max_suggestions, (
229 f"""max_trials {self._config.get("max_trials")} """
230 f"<= max_suggestions{self._max_suggestions}"
231 )
233 if "run_name" not in self._config and self.experiment_id:
234 self._config["run_name"] = self.experiment_id
236 space_adapter_type = self._config.pop("space_adapter_type", None)
237 space_adapter_config = self._config.pop("space_adapter_config", {})
239 if space_adapter_type is not None:
240 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type)
241 assert isinstance(space_adapter_type, SpaceAdapterType)
242 if space_adapter_type == SpaceAdapterType.LLAMATUNE:
243 # This is probably a sane default, especially when
244 # bulk_registering old configs (e.g., Experiment resume), but is
245 # not currently exposed in the config schema.
246 space_adapter_config["use_approximate_reverse_mapping"] = True
248 self._opt: BaseOptimizer = OptimizerFactory.create(
249 parameter_space=self.config_space,
250 optimization_targets=list(self._opt_targets),
251 optimizer_type=opt_type,
252 optimizer_kwargs=self._config,
253 space_adapter_type=space_adapter_type,
254 space_adapter_kwargs=space_adapter_config,
255 )
257 def __exit__(
258 self,
259 ex_type: type[BaseException] | None,
260 ex_val: BaseException | None,
261 ex_tb: TracebackType | None,
262 ) -> Literal[False]:
263 self._opt.cleanup()
264 return super().__exit__(ex_type, ex_val, ex_tb)
266 @property
267 def name(self) -> str:
268 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}"
270 def bulk_register(
271 self,
272 configs: Sequence[dict],
273 scores: Sequence[dict[str, TunableValue] | None],
274 status: Sequence[Status] | None = None,
275 ) -> bool:
277 if not super().bulk_register(configs, scores, status):
278 return False
280 df_configs = self._to_df(configs) # Impute missing values, if necessary
282 df_scores = self._adjust_signs_df(
283 pd.DataFrame([{} if score is None else score for score in scores])
284 )
286 if status is not None:
287 # Select only the completed trials, set scores for failed trials to +inf.
288 df_status = pd.Series(status)
289 # TODO: Be more flexible with values used for failed trials (not just +inf).
290 # Issue: https://github.com/microsoft/MLOS/issues/523
291 df_scores[df_status != Status.SUCCEEDED] = float("inf")
292 df_status_completed = df_status.apply(Status.is_completed)
293 df_configs = df_configs[df_status_completed]
294 df_scores = df_scores[df_status_completed]
296 # TODO: Specify (in the config) which metrics to pass to the optimizer.
297 # Issue: https://github.com/microsoft/MLOS/issues/745
298 self._opt.register(observations=Observations(configs=df_configs, scores=df_scores))
300 if _LOG.isEnabledFor(logging.DEBUG):
301 (score, _) = self.get_best_observation()
302 _LOG.debug("Warm-up END: %s :: %s", self, score)
304 return True
306 def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:
307 """Coerce optimization target scores to floats and adjust the signs for
308 MINIMIZATION problem.
309 """
310 df_targets = df_scores[list(self._opt_targets)]
311 try:
312 return df_targets.astype(float) * self._opt_targets.values()
313 except ValueError as ex:
314 _LOG.error(
315 "Some score values cannot be converted to float - check the data ::\n%s",
316 df_targets,
317 exc_info=True,
318 )
319 raise ValueError("Some score values cannot be converted to float") from ex
321 def _to_df(self, configs: Sequence[dict[str, TunableValue]]) -> pd.DataFrame:
322 """
323 Select from past trials only the columns required in this experiment and impute
324 default values for the tunables that are missing in the dataframe.
326 Parameters
327 ----------
328 configs : Sequence[dict]
329 Sequence of dicts with past trials data.
331 Returns
332 -------
333 df_configs : pd.DataFrame
334 A dataframe with past trials data, with missing values imputed.
335 """
336 df_configs = pd.DataFrame(configs)
337 tunables_names = list(self._tunables.get_param_values().keys())
338 missing_cols = set(tunables_names).difference(df_configs.columns)
339 for tunable, _group in self._tunables:
340 if tunable.name in missing_cols:
341 df_configs[tunable.name] = tunable.default
342 else:
343 df_configs.fillna({tunable.name: tunable.default}, inplace=True)
344 # External data can have incorrect types (e.g., all strings).
345 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)
346 # Add columns for tunables with special values.
347 if tunable.special:
348 (special_name, type_name) = special_param_names(tunable.name)
349 tunables_names += [special_name, type_name]
350 is_special = df_configs[tunable.name].apply(tunable.special.__contains__)
351 df_configs[type_name] = TunableValueKind.RANGE.value
352 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL.value
353 if tunable.type == "int":
354 # Make int column NULLABLE:
355 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64")
356 df_configs[special_name] = df_configs[tunable.name]
357 df_configs.loc[~is_special, special_name] = None
358 df_configs.loc[is_special, tunable.name] = None
359 # By default, hyperparameters in ConfigurationSpace are sorted by name:
360 df_configs = df_configs[sorted(tunables_names)]
361 _LOG.debug("Loaded configs:\n%s", df_configs)
362 return df_configs
364 def suggest(self) -> TunableGroups:
365 tunables = super().suggest()
366 if self._start_with_defaults:
367 _LOG.info("Use default values for the first trial")
368 suggestion = self._opt.suggest(defaults=self._start_with_defaults)
369 self._start_with_defaults = False
370 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, suggestion.config)
371 return tunables.assign(configspace_data_to_tunable_values(suggestion.config.to_dict()))
373 def register(
374 self,
375 tunables: TunableGroups,
376 status: Status,
377 score: dict[str, TunableValue] | None = None,
378 ) -> dict[str, float] | None:
379 registered_score = super().register(
380 tunables,
381 status,
382 score,
383 ) # Sign-adjusted for MINIMIZATION
384 if status.is_completed():
385 assert registered_score is not None
386 df_config = self._to_df([tunables.get_param_values()])
387 _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)
388 # TODO: Specify (in the config) which metrics to pass to the optimizer.
389 # Issue: https://github.com/microsoft/MLOS/issues/745
390 self._opt.register(
391 observations=Observations(
392 configs=df_config,
393 scores=pd.DataFrame([registered_score], dtype=float),
394 )
395 )
396 return registered_score
398 def get_best_observation(
399 self,
400 ) -> tuple[dict[str, float], TunableGroups] | tuple[None, None]:
401 best_observations = self._opt.get_best_observations()
402 if len(best_observations) == 0:
403 return (None, None)
404 params = configspace_data_to_tunable_values(best_observations.configs.iloc[0].to_dict())
405 scores = self._adjust_signs_df(best_observations.scores).iloc[0].to_dict()
406 _LOG.debug("Best observation: %s score: %s", params, scores)
407 return (scores, self._tunables.copy().assign(params))