Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%

108 statements  

« prev     ^ index     » next       coverage.py v7.6.7, created at 2024-11-22 01:18 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""A wrapper for mlos_core optimizers for mlos_bench.""" 

6 

7import logging 

8import os 

9from types import TracebackType 

10from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union 

11 

12import pandas as pd 

13 

14from mlos_bench.environments.status import Status 

15from mlos_bench.optimizers.base_optimizer import Optimizer 

16from mlos_bench.optimizers.convert_configspace import ( 

17 TunableValueKind, 

18 configspace_data_to_tunable_values, 

19 special_param_names, 

20) 

21from mlos_bench.services.base_service import Service 

22from mlos_bench.tunables.tunable import TunableValue 

23from mlos_bench.tunables.tunable_groups import TunableGroups 

24from mlos_core.optimizers import ( 

25 DEFAULT_OPTIMIZER_TYPE, 

26 BaseOptimizer, 

27 OptimizerFactory, 

28 OptimizerType, 

29 SpaceAdapterType, 

30) 

31 

32_LOG = logging.getLogger(__name__) 

33 

34 

35class MlosCoreOptimizer(Optimizer): 

36 """A wrapper class for the mlos_core optimizers.""" 

37 

38 def __init__( 

39 self, 

40 tunables: TunableGroups, 

41 config: dict, 

42 global_config: Optional[dict] = None, 

43 service: Optional[Service] = None, 

44 ): 

45 super().__init__(tunables, config, global_config, service) 

46 

47 opt_type = getattr( 

48 OptimizerType, self._config.pop("optimizer_type", DEFAULT_OPTIMIZER_TYPE.name) 

49 ) 

50 

51 if opt_type == OptimizerType.SMAC: 

52 output_directory = self._config.get("output_directory") 

53 if output_directory is not None: 

54 # If output_directory is specified, turn it into an absolute path. 

55 self._config["output_directory"] = os.path.abspath(output_directory) 

56 else: 

57 _LOG.warning( 

58 ( 

59 "SMAC optimizer output_directory was null. " 

60 "SMAC will use a temporary directory." 

61 ) 

62 ) 

63 

64 # Make sure max_trials >= max_suggestions. 

65 if "max_trials" not in self._config: 

66 self._config["max_trials"] = self._max_suggestions 

67 assert int(self._config["max_trials"]) >= self._max_suggestions, ( 

68 f"max_trials {self._config.get('max_trials')} " 

69 f"<= max_suggestions{self._max_suggestions}" 

70 ) 

71 

72 if "run_name" not in self._config and self.experiment_id: 

73 self._config["run_name"] = self.experiment_id 

74 

75 space_adapter_type = self._config.pop("space_adapter_type", None) 

76 space_adapter_config = self._config.pop("space_adapter_config", {}) 

77 

78 if space_adapter_type is not None: 

79 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type) 

80 

81 self._opt: BaseOptimizer = OptimizerFactory.create( 

82 parameter_space=self.config_space, 

83 optimization_targets=list(self._opt_targets), 

84 optimizer_type=opt_type, 

85 optimizer_kwargs=self._config, 

86 space_adapter_type=space_adapter_type, 

87 space_adapter_kwargs=space_adapter_config, 

88 ) 

89 

90 def __exit__( 

91 self, 

92 ex_type: Optional[Type[BaseException]], 

93 ex_val: Optional[BaseException], 

94 ex_tb: Optional[TracebackType], 

95 ) -> Literal[False]: 

96 self._opt.cleanup() 

97 return super().__exit__(ex_type, ex_val, ex_tb) 

98 

99 @property 

100 def name(self) -> str: 

101 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}" 

102 

103 def bulk_register( 

104 self, 

105 configs: Sequence[dict], 

106 scores: Sequence[Optional[Dict[str, TunableValue]]], 

107 status: Optional[Sequence[Status]] = None, 

108 ) -> bool: 

109 

110 if not super().bulk_register(configs, scores, status): 

111 return False 

112 

113 df_configs = self._to_df(configs) # Impute missing values, if necessary 

114 

115 df_scores = self._adjust_signs_df( 

116 pd.DataFrame([{} if score is None else score for score in scores]) 

117 ) 

118 

119 if status is not None: 

120 # Select only the completed trials, set scores for failed trials to +inf. 

121 df_status = pd.Series(status) 

122 # TODO: Be more flexible with values used for failed trials (not just +inf). 

123 # Issue: https://github.com/microsoft/MLOS/issues/523 

124 df_scores[df_status != Status.SUCCEEDED] = float("inf") 

125 df_status_completed = df_status.apply(Status.is_completed) 

126 df_configs = df_configs[df_status_completed] 

127 df_scores = df_scores[df_status_completed] 

128 

129 # TODO: Specify (in the config) which metrics to pass to the optimizer. 

130 # Issue: https://github.com/microsoft/MLOS/issues/745 

131 self._opt.register(configs=df_configs, scores=df_scores) 

132 

133 if _LOG.isEnabledFor(logging.DEBUG): 

134 (score, _) = self.get_best_observation() 

135 _LOG.debug("Warm-up END: %s :: %s", self, score) 

136 

137 return True 

138 

139 def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame: 

140 """Coerce optimization target scores to floats and adjust the signs for 

141 MINIMIZATION problem. 

142 """ 

143 df_targets = df_scores[list(self._opt_targets)] 

144 try: 

145 return df_targets.astype(float) * self._opt_targets.values() 

146 except ValueError as ex: 

147 _LOG.error( 

148 "Some score values cannot be converted to float - check the data ::\n%s", 

149 df_targets, 

150 exc_info=True, 

151 ) 

152 raise ValueError("Some score values cannot be converted to float") from ex 

153 

154 def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame: 

155 """ 

156 Select from past trials only the columns required in this experiment and impute 

157 default values for the tunables that are missing in the dataframe. 

158 

159 Parameters 

160 ---------- 

161 configs : Sequence[dict] 

162 Sequence of dicts with past trials data. 

163 

164 Returns 

165 ------- 

166 df_configs : pd.DataFrame 

167 A dataframe with past trials data, with missing values imputed. 

168 """ 

169 df_configs = pd.DataFrame(configs) 

170 tunables_names = list(self._tunables.get_param_values().keys()) 

171 missing_cols = set(tunables_names).difference(df_configs.columns) 

172 for tunable, _group in self._tunables: 

173 if tunable.name in missing_cols: 

174 df_configs[tunable.name] = tunable.default 

175 else: 

176 df_configs.fillna({tunable.name: tunable.default}, inplace=True) 

177 # External data can have incorrect types (e.g., all strings). 

178 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype) 

179 # Add columns for tunables with special values. 

180 if tunable.special: 

181 (special_name, type_name) = special_param_names(tunable.name) 

182 tunables_names += [special_name, type_name] 

183 is_special = df_configs[tunable.name].apply(tunable.special.__contains__) 

184 df_configs[type_name] = TunableValueKind.RANGE 

185 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL 

186 if tunable.type == "int": 

187 # Make int column NULLABLE: 

188 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64") 

189 df_configs[special_name] = df_configs[tunable.name] 

190 df_configs.loc[~is_special, special_name] = None 

191 df_configs.loc[is_special, tunable.name] = None 

192 # By default, hyperparameters in ConfigurationSpace are sorted by name: 

193 df_configs = df_configs[sorted(tunables_names)] 

194 _LOG.debug("Loaded configs:\n%s", df_configs) 

195 return df_configs 

196 

197 def suggest(self) -> TunableGroups: 

198 tunables = super().suggest() 

199 if self._start_with_defaults: 

200 _LOG.info("Use default values for the first trial") 

201 df_config, _metadata = self._opt.suggest(defaults=self._start_with_defaults) 

202 self._start_with_defaults = False 

203 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config) 

204 return tunables.assign(configspace_data_to_tunable_values(df_config.loc[0].to_dict())) 

205 

206 def register( 

207 self, 

208 tunables: TunableGroups, 

209 status: Status, 

210 score: Optional[Dict[str, TunableValue]] = None, 

211 ) -> Optional[Dict[str, float]]: 

212 registered_score = super().register( 

213 tunables, 

214 status, 

215 score, 

216 ) # Sign-adjusted for MINIMIZATION 

217 if status.is_completed(): 

218 assert registered_score is not None 

219 df_config = self._to_df([tunables.get_param_values()]) 

220 _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config) 

221 # TODO: Specify (in the config) which metrics to pass to the optimizer. 

222 # Issue: https://github.com/microsoft/MLOS/issues/745 

223 self._opt.register( 

224 configs=df_config, 

225 scores=pd.DataFrame([registered_score], dtype=float), 

226 ) 

227 return registered_score 

228 

229 def get_best_observation( 

230 self, 

231 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: 

232 (df_config, df_score, _df_context) = self._opt.get_best_observations() 

233 if len(df_config) == 0: 

234 return (None, None) 

235 params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict()) 

236 scores = self._adjust_signs_df(df_score).iloc[0].to_dict() 

237 _LOG.debug("Best observation: %s score: %s", params, scores) 

238 return (scores, self._tunables.copy().assign(params))