Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%

109 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-20 00:44 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""A wrapper for mlos_core optimizers for mlos_bench.""" 

6 

7import logging 

8import os 

9from types import TracebackType 

10from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union 

11 

12import pandas as pd 

13 

14from mlos_bench.environments.status import Status 

15from mlos_bench.optimizers.base_optimizer import Optimizer 

16from mlos_bench.optimizers.convert_configspace import ( 

17 TunableValueKind, 

18 configspace_data_to_tunable_values, 

19 special_param_names, 

20) 

21from mlos_bench.services.base_service import Service 

22from mlos_bench.tunables.tunable import TunableValue 

23from mlos_bench.tunables.tunable_groups import TunableGroups 

24from mlos_core.data_classes import Observations 

25from mlos_core.optimizers import ( 

26 DEFAULT_OPTIMIZER_TYPE, 

27 BaseOptimizer, 

28 OptimizerFactory, 

29 OptimizerType, 

30 SpaceAdapterType, 

31) 

32 

33_LOG = logging.getLogger(__name__) 

34 

35 

36class MlosCoreOptimizer(Optimizer): 

37 """A wrapper class for the mlos_core optimizers.""" 

38 

39 def __init__( 

40 self, 

41 tunables: TunableGroups, 

42 config: dict, 

43 global_config: Optional[dict] = None, 

44 service: Optional[Service] = None, 

45 ): 

46 super().__init__(tunables, config, global_config, service) 

47 

48 opt_type = getattr( 

49 OptimizerType, self._config.pop("optimizer_type", DEFAULT_OPTIMIZER_TYPE.name) 

50 ) 

51 

52 if opt_type == OptimizerType.SMAC: 

53 output_directory = self._config.get("output_directory") 

54 if output_directory is not None: 

55 # If output_directory is specified, turn it into an absolute path. 

56 self._config["output_directory"] = os.path.abspath(output_directory) 

57 else: 

58 _LOG.warning( 

59 ( 

60 "SMAC optimizer output_directory was null. " 

61 "SMAC will use a temporary directory." 

62 ) 

63 ) 

64 

65 # Make sure max_trials >= max_suggestions. 

66 if "max_trials" not in self._config: 

67 self._config["max_trials"] = self._max_suggestions 

68 assert int(self._config["max_trials"]) >= self._max_suggestions, ( 

69 f"""max_trials {self._config.get("max_trials")} """ 

70 f"<= max_suggestions{self._max_suggestions}" 

71 ) 

72 

73 if "run_name" not in self._config and self.experiment_id: 

74 self._config["run_name"] = self.experiment_id 

75 

76 space_adapter_type = self._config.pop("space_adapter_type", None) 

77 space_adapter_config = self._config.pop("space_adapter_config", {}) 

78 

79 if space_adapter_type is not None: 

80 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type) 

81 

82 self._opt: BaseOptimizer = OptimizerFactory.create( 

83 parameter_space=self.config_space, 

84 optimization_targets=list(self._opt_targets), 

85 optimizer_type=opt_type, 

86 optimizer_kwargs=self._config, 

87 space_adapter_type=space_adapter_type, 

88 space_adapter_kwargs=space_adapter_config, 

89 ) 

90 

91 def __exit__( 

92 self, 

93 ex_type: Optional[Type[BaseException]], 

94 ex_val: Optional[BaseException], 

95 ex_tb: Optional[TracebackType], 

96 ) -> Literal[False]: 

97 self._opt.cleanup() 

98 return super().__exit__(ex_type, ex_val, ex_tb) 

99 

100 @property 

101 def name(self) -> str: 

102 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}" 

103 

104 def bulk_register( 

105 self, 

106 configs: Sequence[dict], 

107 scores: Sequence[Optional[Dict[str, TunableValue]]], 

108 status: Optional[Sequence[Status]] = None, 

109 ) -> bool: 

110 

111 if not super().bulk_register(configs, scores, status): 

112 return False 

113 

114 df_configs = self._to_df(configs) # Impute missing values, if necessary 

115 

116 df_scores = self._adjust_signs_df( 

117 pd.DataFrame([{} if score is None else score for score in scores]) 

118 ) 

119 

120 if status is not None: 

121 # Select only the completed trials, set scores for failed trials to +inf. 

122 df_status = pd.Series(status) 

123 # TODO: Be more flexible with values used for failed trials (not just +inf). 

124 # Issue: https://github.com/microsoft/MLOS/issues/523 

125 df_scores[df_status != Status.SUCCEEDED] = float("inf") 

126 df_status_completed = df_status.apply(Status.is_completed) 

127 df_configs = df_configs[df_status_completed] 

128 df_scores = df_scores[df_status_completed] 

129 

130 # TODO: Specify (in the config) which metrics to pass to the optimizer. 

131 # Issue: https://github.com/microsoft/MLOS/issues/745 

132 self._opt.register(observations=Observations(configs=df_configs, scores=df_scores)) 

133 

134 if _LOG.isEnabledFor(logging.DEBUG): 

135 (score, _) = self.get_best_observation() 

136 _LOG.debug("Warm-up END: %s :: %s", self, score) 

137 

138 return True 

139 

140 def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame: 

141 """Coerce optimization target scores to floats and adjust the signs for 

142 MINIMIZATION problem. 

143 """ 

144 df_targets = df_scores[list(self._opt_targets)] 

145 try: 

146 return df_targets.astype(float) * self._opt_targets.values() 

147 except ValueError as ex: 

148 _LOG.error( 

149 "Some score values cannot be converted to float - check the data ::\n%s", 

150 df_targets, 

151 exc_info=True, 

152 ) 

153 raise ValueError("Some score values cannot be converted to float") from ex 

154 

155 def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame: 

156 """ 

157 Select from past trials only the columns required in this experiment and impute 

158 default values for the tunables that are missing in the dataframe. 

159 

160 Parameters 

161 ---------- 

162 configs : Sequence[dict] 

163 Sequence of dicts with past trials data. 

164 

165 Returns 

166 ------- 

167 df_configs : pd.DataFrame 

168 A dataframe with past trials data, with missing values imputed. 

169 """ 

170 df_configs = pd.DataFrame(configs) 

171 tunables_names = list(self._tunables.get_param_values().keys()) 

172 missing_cols = set(tunables_names).difference(df_configs.columns) 

173 for tunable, _group in self._tunables: 

174 if tunable.name in missing_cols: 

175 df_configs[tunable.name] = tunable.default 

176 else: 

177 df_configs.fillna({tunable.name: tunable.default}, inplace=True) 

178 # External data can have incorrect types (e.g., all strings). 

179 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype) 

180 # Add columns for tunables with special values. 

181 if tunable.special: 

182 (special_name, type_name) = special_param_names(tunable.name) 

183 tunables_names += [special_name, type_name] 

184 is_special = df_configs[tunable.name].apply(tunable.special.__contains__) 

185 df_configs[type_name] = TunableValueKind.RANGE 

186 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL 

187 if tunable.type == "int": 

188 # Make int column NULLABLE: 

189 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64") 

190 df_configs[special_name] = df_configs[tunable.name] 

191 df_configs.loc[~is_special, special_name] = None 

192 df_configs.loc[is_special, tunable.name] = None 

193 # By default, hyperparameters in ConfigurationSpace are sorted by name: 

194 df_configs = df_configs[sorted(tunables_names)] 

195 _LOG.debug("Loaded configs:\n%s", df_configs) 

196 return df_configs 

197 

198 def suggest(self) -> TunableGroups: 

199 tunables = super().suggest() 

200 if self._start_with_defaults: 

201 _LOG.info("Use default values for the first trial") 

202 suggestion = self._opt.suggest(defaults=self._start_with_defaults) 

203 self._start_with_defaults = False 

204 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, suggestion.config) 

205 return tunables.assign(configspace_data_to_tunable_values(suggestion.config.to_dict())) 

206 

207 def register( 

208 self, 

209 tunables: TunableGroups, 

210 status: Status, 

211 score: Optional[Dict[str, TunableValue]] = None, 

212 ) -> Optional[Dict[str, float]]: 

213 registered_score = super().register( 

214 tunables, 

215 status, 

216 score, 

217 ) # Sign-adjusted for MINIMIZATION 

218 if status.is_completed(): 

219 assert registered_score is not None 

220 df_config = self._to_df([tunables.get_param_values()]) 

221 _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config) 

222 # TODO: Specify (in the config) which metrics to pass to the optimizer. 

223 # Issue: https://github.com/microsoft/MLOS/issues/745 

224 self._opt.register( 

225 observations=Observations( 

226 configs=df_config, 

227 scores=pd.DataFrame([registered_score], dtype=float), 

228 ) 

229 ) 

230 return registered_score 

231 

232 def get_best_observation( 

233 self, 

234 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: 

235 best_observations = self._opt.get_best_observations() 

236 if len(best_observations) == 0: 

237 return (None, None) 

238 params = configspace_data_to_tunable_values(best_observations.configs.iloc[0].to_dict()) 

239 scores = self._adjust_signs_df(best_observations.scores).iloc[0].to_dict() 

240 _LOG.debug("Best observation: %s score: %s", params, scores) 

241 return (scores, self._tunables.copy().assign(params))