Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 99%

119 statements  

« prev     ^ index     » next       coverage.py v7.6.7, created at 2024-11-22 01:18 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Contains the :py:class:`.BaseOptimizer` abstract class.""" 

6 

7import collections 

8from abc import ABCMeta, abstractmethod 

9from typing import List, Optional, Tuple, Union 

10 

11import ConfigSpace 

12import numpy as np 

13import numpy.typing as npt 

14import pandas as pd 

15 

16from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

17from mlos_core.util import config_to_dataframe 

18 

19 

20class BaseOptimizer(metaclass=ABCMeta): 

21 """Optimizer abstract base class defining the basic interface: 

22 :py:meth:`~.BaseOptimizer.suggest`, 

23 :py:meth:`~.BaseOptimizer.register`, 

24 """ 

25 

26 # pylint: disable=too-many-instance-attributes 

27 

28 def __init__( 

29 self, 

30 *, 

31 parameter_space: ConfigSpace.ConfigurationSpace, 

32 optimization_targets: List[str], 

33 objective_weights: Optional[List[float]] = None, 

34 space_adapter: Optional[BaseSpaceAdapter] = None, 

35 ): 

36 """ 

37 Create a new instance of the base optimizer. 

38 

39 Parameters 

40 ---------- 

41 parameter_space : ConfigSpace.ConfigurationSpace 

42 The parameter space to optimize. 

43 optimization_targets : List[str] 

44 The names of the optimization targets to minimize. 

45 To maximize a target, use the negative of the target when registering scores. 

46 objective_weights : Optional[List[float]] 

47 Optional list of weights of optimization targets. 

48 space_adapter : BaseSpaceAdapter 

49 The space adapter class to employ for parameter space transformations. 

50 """ 

51 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space 

52 """The parameter space to optimize.""" 

53 

54 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = ( 

55 parameter_space if space_adapter is None else space_adapter.target_parameter_space 

56 ) 

57 """ 

58 The parameter space actually used by the optimizer. 

59 

60 (in case a :py:mod:`SpaceAdapter <mlos_core.spaces.adapters>` is used) 

61 """ 

62 

63 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space: 

64 raise ValueError("Given parameter space differs from the one given to space adapter") 

65 

66 self._optimization_targets = optimization_targets 

67 self._objective_weights = objective_weights 

68 if objective_weights is not None and len(objective_weights) != len(optimization_targets): 

69 raise ValueError("Number of weights must match the number of optimization targets") 

70 

71 self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter 

72 self._observations: List[Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]] = [] 

73 self._has_context: Optional[bool] = None 

74 self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = [] 

75 

76 def __repr__(self) -> str: 

77 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})" 

78 

79 @property 

80 def space_adapter(self) -> Optional[BaseSpaceAdapter]: 

81 """Get the space adapter instance (if any).""" 

82 return self._space_adapter 

83 

84 def register( 

85 self, 

86 *, 

87 configs: pd.DataFrame, 

88 scores: pd.DataFrame, 

89 context: Optional[pd.DataFrame] = None, 

90 metadata: Optional[pd.DataFrame] = None, 

91 ) -> None: 

92 """ 

93 Wrapper method, which employs the space adapter (if any), before registering the 

94 configs and scores. 

95 

96 Parameters 

97 ---------- 

98 configs : pandas.DataFrame 

99 Dataframe of configs / parameters. The columns are parameter names and 

100 the rows are the configs. 

101 scores : pandas.DataFrame 

102 Scores from running the configs. The index is the same as the index of the configs. 

103 

104 context : pandas.DataFrame 

105 Not Yet Implemented. 

106 

107 metadata : Optional[pandas.DataFrame] 

108 Metadata returned by the backend optimizer's suggest method. 

109 """ 

110 # Do some input validation. 

111 assert metadata is None or isinstance(metadata, pd.DataFrame) 

112 assert set(scores.columns) == set( 

113 self._optimization_targets 

114 ), "Mismatched optimization targets." 

115 assert self._has_context is None or self._has_context ^ ( 

116 context is None 

117 ), "Context must always be added or never be added." 

118 assert len(configs) == len(scores), "Mismatched number of configs and scores." 

119 if context is not None: 

120 assert len(configs) == len(context), "Mismatched number of configs and context." 

121 assert configs.shape[1] == len( 

122 self.parameter_space.values() 

123 ), "Mismatched configuration shape." 

124 self._observations.append((configs, scores, context)) 

125 self._has_context = context is not None 

126 

127 if self._space_adapter: 

128 configs = self._space_adapter.inverse_transform(configs) 

129 assert configs.shape[1] == len( 

130 self.optimizer_parameter_space.values() 

131 ), "Mismatched configuration shape after inverse transform." 

132 return self._register(configs=configs, scores=scores, context=context) 

133 

134 @abstractmethod 

135 def _register( 

136 self, 

137 *, 

138 configs: pd.DataFrame, 

139 scores: pd.DataFrame, 

140 context: Optional[pd.DataFrame] = None, 

141 metadata: Optional[pd.DataFrame] = None, 

142 ) -> None: 

143 """ 

144 Registers the given configs and scores. 

145 

146 Parameters 

147 ---------- 

148 configs : pandas.DataFrame 

149 Dataframe of configs / parameters. The columns are parameter names and 

150 the rows are the configs. 

151 scores : pandas.DataFrame 

152 Scores from running the configs. The index is the same as the index of the configs. 

153 

154 context : pandas.DataFrame 

155 Not Yet Implemented. 

156 """ 

157 pass # pylint: disable=unnecessary-pass # pragma: no cover 

158 

159 def suggest( 

160 self, 

161 *, 

162 context: Optional[pd.DataFrame] = None, 

163 defaults: bool = False, 

164 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: 

165 """ 

166 Wrapper method, which employs the space adapter (if any), after suggesting a new 

167 configuration. 

168 

169 Parameters 

170 ---------- 

171 context : pandas.DataFrame 

172 Not Yet Implemented. 

173 defaults : bool 

174 Whether or not to return the default config instead of an optimizer guided one. 

175 By default, use the one from the optimizer. 

176 

177 Returns 

178 ------- 

179 configuration : pandas.DataFrame 

180 Pandas dataframe with a single row. Column names are the parameter names. 

181 

182 metadata : Optional[pandas.DataFrame] 

183 The metadata associated with the given configuration used for evaluations. 

184 Backend optimizer specific. 

185 """ 

186 if defaults: 

187 configuration = config_to_dataframe(self.parameter_space.get_default_configuration()) 

188 metadata = None 

189 if self.space_adapter is not None: 

190 configuration = self.space_adapter.inverse_transform(configuration) 

191 else: 

192 configuration, metadata = self._suggest(context=context) 

193 assert len(configuration) == 1, "Suggest must return a single configuration." 

194 assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), ( 

195 "Optimizer suggested a configuration that does " 

196 "not match the expected parameter space." 

197 ) 

198 if self._space_adapter: 

199 configuration = self._space_adapter.transform(configuration) 

200 assert set(configuration.columns).issubset(set(self.parameter_space)), ( 

201 "Space adapter produced a configuration that does " 

202 "not match the expected parameter space." 

203 ) 

204 return configuration, metadata 

205 

206 @abstractmethod 

207 def _suggest( 

208 self, 

209 *, 

210 context: Optional[pd.DataFrame] = None, 

211 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: 

212 """ 

213 Suggests a new configuration. 

214 

215 Parameters 

216 ---------- 

217 context : pandas.DataFrame 

218 Not Yet Implemented. 

219 

220 Returns 

221 ------- 

222 configuration : pandas.DataFrame 

223 Pandas dataframe with a single row. Column names are the parameter names. 

224 

225 metadata : Optional[pandas.DataFrame] 

226 The metadata associated with the given configuration used for evaluations. 

227 Backend optimizer specific. 

228 """ 

229 pass # pylint: disable=unnecessary-pass # pragma: no cover 

230 

231 @abstractmethod 

232 def register_pending( 

233 self, 

234 *, 

235 configs: pd.DataFrame, 

236 context: Optional[pd.DataFrame] = None, 

237 metadata: Optional[pd.DataFrame] = None, 

238 ) -> None: 

239 """ 

240 Registers the given configs as "pending". That is it say, it has been suggested 

241 by the optimizer, and an experiment trial has been started. This can be useful 

242 for executing multiple trials in parallel, retry logic, etc. 

243 

244 Parameters 

245 ---------- 

246 configs : pandas.DataFrame 

247 Dataframe of configs / parameters. The columns are parameter names and 

248 the rows are the configs. 

249 context : pandas.DataFrame 

250 Not Yet Implemented. 

251 metadata : Optional[pandas.DataFrame] 

252 Metadata returned by the backend optimizer's suggest method. 

253 """ 

254 pass # pylint: disable=unnecessary-pass # pragma: no cover 

255 

256 def get_observations(self) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: 

257 """ 

258 Returns the observations as a triplet of DataFrames (config, score, context). 

259 

260 Returns 

261 ------- 

262 observations : Tuple[pandas.DataFrame, pandas.DataFrame, Optional[pandas.DataFrame]] 

263 A triplet of (config, score, context) DataFrames of observations. 

264 """ 

265 if len(self._observations) == 0: 

266 raise ValueError("No observations registered yet.") 

267 configs = pd.concat([config for config, _, _ in self._observations]).reset_index(drop=True) 

268 scores = pd.concat([score for _, score, _ in self._observations]).reset_index(drop=True) 

269 contexts = pd.concat( 

270 [ 

271 pd.DataFrame() if context is None else context 

272 for _, _, context in self._observations 

273 ] 

274 ).reset_index(drop=True) 

275 return (configs, scores, contexts if len(contexts.columns) > 0 else None) 

276 

277 def get_best_observations( 

278 self, 

279 *, 

280 n_max: int = 1, 

281 ) -> Tuple[pd.DataFrame, pd.DataFrame, Optional[pd.DataFrame]]: 

282 """ 

283 Get the N best observations so far as a triplet of DataFrames (config, score, 

284 context). Default is N=1. The columns are ordered in ASCENDING order of the 

285 optimization targets. The function uses `pandas.DataFrame.nsmallest(..., 

286 keep="first")` method under the hood. 

287 

288 Parameters 

289 ---------- 

290 n_max : int 

291 Maximum number of best observations to return. Default is 1. 

292 

293 Returns 

294 ------- 

295 observations : Tuple[pandas.DataFrame, pandas.DataFrame, Optional[pandas.DataFrame]] 

296 A triplet of best (config, score, context) DataFrames of best observations. 

297 """ 

298 if len(self._observations) == 0: 

299 raise ValueError("No observations registered yet.") 

300 (configs, scores, contexts) = self.get_observations() 

301 idx = scores.nsmallest(n_max, columns=self._optimization_targets, keep="first").index 

302 return (configs.loc[idx], scores.loc[idx], None if contexts is None else contexts.loc[idx]) 

303 

304 def cleanup(self) -> None: 

305 """ 

306 Remove temp files, release resources, etc. 

307 

308 after use. Default is no-op. Redefine this method in optimizers that require 

309 cleanup. 

310 """ 

311 

312 def _from_1hot(self, *, config: npt.NDArray) -> pd.DataFrame: 

313 """Convert numpy array from one-hot encoding to a DataFrame with categoricals 

314 and ints in proper columns. 

315 """ 

316 df_dict = collections.defaultdict(list) 

317 for i in range(config.shape[0]): 

318 j = 0 

319 for param in self.optimizer_parameter_space.values(): 

320 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

321 for offset, val in enumerate(param.choices): 

322 if config[i][j + offset] == 1: 

323 df_dict[param.name].append(val) 

324 break 

325 j += len(param.choices) 

326 else: 

327 val = config[i][j] 

328 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter): 

329 val = int(val) 

330 df_dict[param.name].append(val) 

331 j += 1 

332 return pd.DataFrame(df_dict) 

333 

334 def _to_1hot(self, *, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: 

335 """Convert pandas DataFrame to one-hot-encoded numpy array.""" 

336 n_cols = 0 

337 n_rows = config.shape[0] if config.ndim > 1 else 1 

338 for param in self.optimizer_parameter_space.values(): 

339 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

340 n_cols += len(param.choices) 

341 else: 

342 n_cols += 1 

343 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32) 

344 for i in range(n_rows): 

345 j = 0 

346 for param in self.optimizer_parameter_space.values(): 

347 if config.ndim > 1: 

348 assert isinstance(config, pd.DataFrame) 

349 col = config.columns.get_loc(param.name) 

350 assert isinstance(col, int) 

351 val = config.iloc[i, col] 

352 else: 

353 assert isinstance(config, pd.Series) 

354 col = config.index.get_loc(param.name) 

355 assert isinstance(col, int) 

356 val = config.iloc[col] 

357 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

358 offset = param.choices.index(val) 

359 one_hot[i][j + offset] = 1 

360 j += len(param.choices) 

361 else: 

362 one_hot[i][j] = val 

363 j += 1 

364 return one_hot