Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%
106 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6A wrapper for mlos_core optimizers for mlos_bench.
7"""
9import logging
10import os
12from types import TracebackType
13from typing import Dict, Optional, Sequence, Tuple, Type, Union
14from typing_extensions import Literal
16import pandas as pd
18from mlos_core.optimizers import (
19 BaseOptimizer, OptimizerType, OptimizerFactory, SpaceAdapterType, DEFAULT_OPTIMIZER_TYPE
20)
22from mlos_bench.environments.status import Status
23from mlos_bench.services.base_service import Service
24from mlos_bench.tunables.tunable import TunableValue
25from mlos_bench.tunables.tunable_groups import TunableGroups
26from mlos_bench.optimizers.base_optimizer import Optimizer
28from mlos_bench.optimizers.convert_configspace import (
29 TunableValueKind,
30 configspace_data_to_tunable_values,
31 special_param_names,
32)
34_LOG = logging.getLogger(__name__)
37class MlosCoreOptimizer(Optimizer):
38 """
39 A wrapper class for the mlos_core optimizers.
40 """
42 def __init__(self,
43 tunables: TunableGroups,
44 config: dict,
45 global_config: Optional[dict] = None,
46 service: Optional[Service] = None):
47 super().__init__(tunables, config, global_config, service)
49 opt_type = getattr(OptimizerType, self._config.pop(
50 'optimizer_type', DEFAULT_OPTIMIZER_TYPE.name))
52 if opt_type == OptimizerType.SMAC:
53 output_directory = self._config.get('output_directory')
54 if output_directory is not None:
55 # If output_directory is specified, turn it into an absolute path.
56 self._config['output_directory'] = os.path.abspath(output_directory)
57 else:
58 _LOG.warning("SMAC optimizer output_directory was null. SMAC will use a temporary directory.")
60 # Make sure max_trials >= max_iterations.
61 if 'max_trials' not in self._config:
62 self._config['max_trials'] = self._max_iter
63 assert int(self._config['max_trials']) >= self._max_iter, \
64 f"max_trials {self._config.get('max_trials')} <= max_iterations {self._max_iter}"
66 if 'run_name' not in self._config and self.experiment_id:
67 self._config['run_name'] = self.experiment_id
69 space_adapter_type = self._config.pop('space_adapter_type', None)
70 space_adapter_config = self._config.pop('space_adapter_config', {})
72 if space_adapter_type is not None:
73 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type)
75 self._opt: BaseOptimizer = OptimizerFactory.create(
76 parameter_space=self.config_space,
77 optimizer_type=opt_type,
78 optimizer_kwargs=self._config,
79 space_adapter_type=space_adapter_type,
80 space_adapter_kwargs=space_adapter_config,
81 )
83 def __exit__(self, ex_type: Optional[Type[BaseException]],
84 ex_val: Optional[BaseException],
85 ex_tb: Optional[TracebackType]) -> Literal[False]:
86 self._opt.cleanup()
87 return super().__exit__(ex_type, ex_val, ex_tb)
89 @property
90 def name(self) -> str:
91 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}"
93 def bulk_register(self,
94 configs: Sequence[dict],
95 scores: Sequence[Optional[Dict[str, TunableValue]]],
96 status: Optional[Sequence[Status]] = None) -> bool:
97 if not super().bulk_register(configs, scores, status):
98 return False
99 df_configs = self._to_df(configs) # Impute missing values, if necessary
100 df_scores = pd.Series(
101 [self._extract_target(score) for score in scores],
102 dtype=float) * self._opt_sign
103 if status is not None:
104 df_status = pd.Series(status)
105 df_scores[df_status != Status.SUCCEEDED] = float("inf")
106 df_status_completed = df_status.apply(Status.is_completed)
107 df_configs = df_configs[df_status_completed]
108 df_scores = df_scores[df_status_completed]
109 self._opt.register(df_configs, df_scores)
110 if _LOG.isEnabledFor(logging.DEBUG):
111 (score, _) = self.get_best_observation()
112 _LOG.debug("Warm-up end: %s = %s", self.target, score)
113 return True
115 def _extract_target(self, scores: Optional[Dict[str, TunableValue]]) -> Optional[TunableValue]:
116 return None if scores is None else scores[self._opt_target]
118 def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame:
119 """
120 Select from past trials only the columns required in this experiment and
121 impute default values for the tunables that are missing in the dataframe.
123 Parameters
124 ----------
125 configs : Sequence[dict]
126 Sequence of dicts with past trials data.
128 Returns
129 -------
130 df_configs : pd.DataFrame
131 A dataframe with past trials data, with missing values imputed.
132 """
133 df_configs = pd.DataFrame(configs)
134 tunables_names = list(self._tunables.get_param_values().keys())
135 missing_cols = set(tunables_names).difference(df_configs.columns)
136 for (tunable, _group) in self._tunables:
137 if tunable.name in missing_cols:
138 df_configs[tunable.name] = tunable.default
139 else:
140 df_configs.fillna({tunable.name: tunable.default}, inplace=True)
141 # External data can have incorrect types (e.g., all strings).
142 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)
143 # Add columns for tunables with special values.
144 if tunable.special:
145 (special_name, type_name) = special_param_names(tunable.name)
146 tunables_names += [special_name, type_name]
147 is_special = df_configs[tunable.name].apply(tunable.special.__contains__)
148 df_configs[type_name] = TunableValueKind.RANGE
149 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL
150 if tunable.type == "int":
151 # Make int column NULLABLE:
152 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64")
153 df_configs[special_name] = df_configs[tunable.name]
154 df_configs.loc[~is_special, special_name] = None
155 df_configs.loc[is_special, tunable.name] = None
156 # By default, hyperparameters in ConfigurationSpace are sorted by name:
157 df_configs = df_configs[sorted(tunables_names)]
158 _LOG.debug("Loaded configs:\n%s", df_configs)
159 return df_configs
161 def suggest(self) -> TunableGroups:
162 tunables = super().suggest()
163 if self._start_with_defaults:
164 _LOG.info("Use default values for the first trial")
165 df_config = self._opt.suggest(defaults=self._start_with_defaults)
166 self._start_with_defaults = False
167 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config)
168 return tunables.assign(
169 configspace_data_to_tunable_values(df_config.loc[0].to_dict()))
171 def register(self, tunables: TunableGroups, status: Status,
172 score: Optional[Union[float, dict]] = None) -> Optional[float]:
173 score = super().register(tunables, status, score) # With _opt_sign applied
174 if status.is_completed():
175 df_config = self._to_df([tunables.get_param_values()])
176 _LOG.debug("Score: %s Dataframe:\n%s", score, df_config)
177 self._opt.register(df_config, pd.Series([score], dtype=float))
178 return score
180 def get_best_observation(self) -> Union[Tuple[float, TunableGroups], Tuple[None, None]]:
181 df_config = self._opt.get_best_observation()
182 if len(df_config) == 0:
183 return (None, None)
184 params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict())
185 _LOG.debug("Best observation: %s", params)
186 score = params.pop("score")
187 assert score is not None
188 score = float(score) * self._opt_sign # mlos_core always uses the `score` column
189 return (score, self._tunables.copy().assign(params))