Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%
108 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""A wrapper for mlos_core optimizers for mlos_bench."""
7import logging
8import os
9from types import TracebackType
10from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union
12import pandas as pd
14from mlos_bench.environments.status import Status
15from mlos_bench.optimizers.base_optimizer import Optimizer
16from mlos_bench.optimizers.convert_configspace import (
17 TunableValueKind,
18 configspace_data_to_tunable_values,
19 special_param_names,
20)
21from mlos_bench.services.base_service import Service
22from mlos_bench.tunables.tunable import TunableValue
23from mlos_bench.tunables.tunable_groups import TunableGroups
24from mlos_core.optimizers import (
25 DEFAULT_OPTIMIZER_TYPE,
26 BaseOptimizer,
27 OptimizerFactory,
28 OptimizerType,
29 SpaceAdapterType,
30)
32_LOG = logging.getLogger(__name__)
35class MlosCoreOptimizer(Optimizer):
36 """A wrapper class for the mlos_core optimizers."""
38 def __init__(
39 self,
40 tunables: TunableGroups,
41 config: dict,
42 global_config: Optional[dict] = None,
43 service: Optional[Service] = None,
44 ):
45 super().__init__(tunables, config, global_config, service)
47 opt_type = getattr(
48 OptimizerType, self._config.pop("optimizer_type", DEFAULT_OPTIMIZER_TYPE.name)
49 )
51 if opt_type == OptimizerType.SMAC:
52 output_directory = self._config.get("output_directory")
53 if output_directory is not None:
54 # If output_directory is specified, turn it into an absolute path.
55 self._config["output_directory"] = os.path.abspath(output_directory)
56 else:
57 _LOG.warning(
58 (
59 "SMAC optimizer output_directory was null. "
60 "SMAC will use a temporary directory."
61 )
62 )
64 # Make sure max_trials >= max_suggestions.
65 if "max_trials" not in self._config:
66 self._config["max_trials"] = self._max_suggestions
67 assert int(self._config["max_trials"]) >= self._max_suggestions, (
68 f"max_trials {self._config.get('max_trials')} "
69 f"<= max_suggestions{self._max_suggestions}"
70 )
72 if "run_name" not in self._config and self.experiment_id:
73 self._config["run_name"] = self.experiment_id
75 space_adapter_type = self._config.pop("space_adapter_type", None)
76 space_adapter_config = self._config.pop("space_adapter_config", {})
78 if space_adapter_type is not None:
79 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type)
81 self._opt: BaseOptimizer = OptimizerFactory.create(
82 parameter_space=self.config_space,
83 optimization_targets=list(self._opt_targets),
84 optimizer_type=opt_type,
85 optimizer_kwargs=self._config,
86 space_adapter_type=space_adapter_type,
87 space_adapter_kwargs=space_adapter_config,
88 )
90 def __exit__(
91 self,
92 ex_type: Optional[Type[BaseException]],
93 ex_val: Optional[BaseException],
94 ex_tb: Optional[TracebackType],
95 ) -> Literal[False]:
96 self._opt.cleanup()
97 return super().__exit__(ex_type, ex_val, ex_tb)
99 @property
100 def name(self) -> str:
101 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}"
103 def bulk_register(
104 self,
105 configs: Sequence[dict],
106 scores: Sequence[Optional[Dict[str, TunableValue]]],
107 status: Optional[Sequence[Status]] = None,
108 ) -> bool:
110 if not super().bulk_register(configs, scores, status):
111 return False
113 df_configs = self._to_df(configs) # Impute missing values, if necessary
115 df_scores = self._adjust_signs_df(
116 pd.DataFrame([{} if score is None else score for score in scores])
117 )
119 if status is not None:
120 # Select only the completed trials, set scores for failed trials to +inf.
121 df_status = pd.Series(status)
122 # TODO: Be more flexible with values used for failed trials (not just +inf).
123 # Issue: https://github.com/microsoft/MLOS/issues/523
124 df_scores[df_status != Status.SUCCEEDED] = float("inf")
125 df_status_completed = df_status.apply(Status.is_completed)
126 df_configs = df_configs[df_status_completed]
127 df_scores = df_scores[df_status_completed]
129 # TODO: Specify (in the config) which metrics to pass to the optimizer.
130 # Issue: https://github.com/microsoft/MLOS/issues/745
131 self._opt.register(configs=df_configs, scores=df_scores)
133 if _LOG.isEnabledFor(logging.DEBUG):
134 (score, _) = self.get_best_observation()
135 _LOG.debug("Warm-up END: %s :: %s", self, score)
137 return True
139 def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:
140 """Coerce optimization target scores to floats and adjust the signs for
141 MINIMIZATION problem.
142 """
143 df_targets = df_scores[list(self._opt_targets)]
144 try:
145 return df_targets.astype(float) * self._opt_targets.values()
146 except ValueError as ex:
147 _LOG.error(
148 "Some score values cannot be converted to float - check the data ::\n%s",
149 df_targets,
150 exc_info=True,
151 )
152 raise ValueError("Some score values cannot be converted to float") from ex
154 def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:
155 """
156 Select from past trials only the columns required in this experiment and impute
157 default values for the tunables that are missing in the dataframe.
159 Parameters
160 ----------
161 configs : Sequence[dict]
162 Sequence of dicts with past trials data.
164 Returns
165 -------
166 df_configs : pd.DataFrame
167 A dataframe with past trials data, with missing values imputed.
168 """
169 df_configs = pd.DataFrame(configs)
170 tunables_names = list(self._tunables.get_param_values().keys())
171 missing_cols = set(tunables_names).difference(df_configs.columns)
172 for tunable, _group in self._tunables:
173 if tunable.name in missing_cols:
174 df_configs[tunable.name] = tunable.default
175 else:
176 df_configs.fillna({tunable.name: tunable.default}, inplace=True)
177 # External data can have incorrect types (e.g., all strings).
178 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)
179 # Add columns for tunables with special values.
180 if tunable.special:
181 (special_name, type_name) = special_param_names(tunable.name)
182 tunables_names += [special_name, type_name]
183 is_special = df_configs[tunable.name].apply(tunable.special.__contains__)
184 df_configs[type_name] = TunableValueKind.RANGE
185 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL
186 if tunable.type == "int":
187 # Make int column NULLABLE:
188 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64")
189 df_configs[special_name] = df_configs[tunable.name]
190 df_configs.loc[~is_special, special_name] = None
191 df_configs.loc[is_special, tunable.name] = None
192 # By default, hyperparameters in ConfigurationSpace are sorted by name:
193 df_configs = df_configs[sorted(tunables_names)]
194 _LOG.debug("Loaded configs:\n%s", df_configs)
195 return df_configs
197 def suggest(self) -> TunableGroups:
198 tunables = super().suggest()
199 if self._start_with_defaults:
200 _LOG.info("Use default values for the first trial")
201 df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults)
202 self._start_with_defaults = False
203 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config)
204 return tunables.assign(configspace_data_to_tunable_values(df_config.loc[0].to_dict()))
206 def register(
207 self,
208 tunables: TunableGroups,
209 status: Status,
210 score: Optional[Dict[str, TunableValue]] = None,
211 ) -> Optional[Dict[str, float]]:
212 registered_score = super().register(
213 tunables,
214 status,
215 score,
216 ) # Sign-adjusted for MINIMIZATION
217 if status.is_completed():
218 assert registered_score is not None
219 df_config = self._to_df([tunables.get_param_values()])
220 _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)
221 # TODO: Specify (in the config) which metrics to pass to the optimizer.
222 # Issue: https://github.com/microsoft/MLOS/issues/745
223 self._opt.register(
224 configs=df_config,
225 scores=pd.DataFrame([registered_score], dtype=float),
226 )
227 return registered_score
229 def get_best_observation(
230 self,
231 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:
232 (df_config, df_score, _df_context) = self._opt.get_best_observations()
233 if len(df_config) == 0:
234 return (None, None)
235 params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())
236 scores = self._adjust_signs_df(df_score).iloc[0].to_dict()
237 _LOG.debug("Best observation: %s score: %s", params, scores)
238 return (scores, self._tunables.copy().assign(params))