Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 98%

120 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-20 00:44 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Contains the :py:class:`.BaseOptimizer` abstract class.""" 

6 

7import collections 

8from abc import ABCMeta, abstractmethod 

9from copy import deepcopy 

10from typing import List, Optional, Tuple, Union 

11 

12import ConfigSpace 

13import numpy as np 

14import numpy.typing as npt 

15import pandas as pd 

16 

17from mlos_core.data_classes import Observation, Observations, Suggestion 

18from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

19from mlos_core.util import config_to_series 

20 

21 

22class BaseOptimizer(metaclass=ABCMeta): 

23 """Optimizer abstract base class defining the basic interface: 

24 :py:meth:`~.BaseOptimizer.suggest`, 

25 :py:meth:`~.BaseOptimizer.register`, 

26 """ 

27 

28 # pylint: disable=too-many-instance-attributes 

29 

30 def __init__( 

31 self, 

32 *, 

33 parameter_space: ConfigSpace.ConfigurationSpace, 

34 optimization_targets: List[str], 

35 objective_weights: Optional[List[float]] = None, 

36 space_adapter: Optional[BaseSpaceAdapter] = None, 

37 ): 

38 """ 

39 Create a new instance of the base optimizer. 

40 

41 Parameters 

42 ---------- 

43 parameter_space : ConfigSpace.ConfigurationSpace 

44 The parameter space to optimize. 

45 optimization_targets : List[str] 

46 The names of the optimization targets to minimize. 

47 To maximize a target, use the negative of the target when registering scores. 

48 objective_weights : Optional[List[float]] 

49 Optional list of weights of optimization targets. 

50 space_adapter : BaseSpaceAdapter 

51 The space adapter class to employ for parameter space transformations. 

52 """ 

53 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space 

54 """The parameter space to optimize.""" 

55 

56 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = ( 

57 parameter_space if space_adapter is None else space_adapter.target_parameter_space 

58 ) 

59 """ 

60 The parameter space actually used by the optimizer. 

61 

62 (in case a :py:mod:`SpaceAdapter <mlos_core.spaces.adapters>` is used) 

63 """ 

64 

65 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space: 

66 raise ValueError("Given parameter space differs from the one given to space adapter") 

67 

68 self._optimization_targets = optimization_targets 

69 self._objective_weights = objective_weights 

70 if objective_weights is not None and len(objective_weights) != len(optimization_targets): 

71 raise ValueError("Number of weights must match the number of optimization targets") 

72 

73 self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter 

74 self._observations: Observations = Observations() 

75 self._has_context: Optional[bool] = None 

76 self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = [] 

77 

78 def __repr__(self) -> str: 

79 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})" 

80 

81 @property 

82 def space_adapter(self) -> Optional[BaseSpaceAdapter]: 

83 """Get the space adapter instance (if any).""" 

84 return self._space_adapter 

85 

86 def register( 

87 self, 

88 observations: Union[Observation, Observations], 

89 ) -> None: 

90 """ 

91 Register all observations at once. Exactly one of observations or observation 

92 must be provided. 

93 

94 Parameters 

95 ---------- 

96 observations: Optional[Union[Observation, Observations]] 

97 The observations to register. 

98 """ 

99 if isinstance(observations, Observation): 

100 observations = Observations(observations=[observations]) 

101 # Check input and transform the observations if a space adapter is present. 

102 observations = Observations( 

103 observations=[ 

104 self._preprocess_observation(observation) for observation in observations 

105 ] 

106 ) 

107 # Now bulk register all observations (details delegated to the underlying classes). 

108 self._register(observations) 

109 

110 def _preprocess_observation(self, observation: Observation) -> Observation: 

111 """ 

112 Wrapper method, which employs the space adapter (if any), and does some input 

113 validation, before registering the configs and scores. 

114 

115 Parameters 

116 ---------- 

117 observation: Observation 

118 The observation to register. 

119 

120 Returns 

121 ------- 

122 observation: Observation 

123 The (possibly transformed) observation to register. 

124 """ 

125 # Do some input validation. 

126 assert observation.metadata is None or isinstance(observation.metadata, pd.Series) 

127 assert set(observation.score.index) == set( 

128 self._optimization_targets 

129 ), "Mismatched optimization targets." 

130 assert self._has_context is None or self._has_context ^ ( 

131 observation.context is None 

132 ), "Context must always be added or never be added." 

133 assert len(observation.config) == len( 

134 self.parameter_space.values() 

135 ), "Mismatched configuration shape." 

136 

137 self._has_context = observation.context is not None 

138 self._observations.append(observation) 

139 

140 transformed_observation = deepcopy(observation) # Needed to support named tuples 

141 if self._space_adapter: 

142 transformed_observation = Observation( 

143 config=self._space_adapter.inverse_transform(transformed_observation.config), 

144 score=transformed_observation.score, 

145 context=transformed_observation.context, 

146 metadata=transformed_observation.metadata, 

147 ) 

148 assert len(transformed_observation.config) == len( 

149 self.optimizer_parameter_space.values() 

150 ), "Mismatched configuration shape after inverse transform." 

151 return transformed_observation 

152 

153 @abstractmethod 

154 def _register( 

155 self, 

156 observations: Observations, 

157 ) -> None: 

158 """ 

159 Registers the given configs and scores. 

160 

161 Parameters 

162 ---------- 

163 observations: Observations 

164 The set of observations to register. 

165 """ 

166 pass # pylint: disable=unnecessary-pass # pragma: no cover 

167 

168 def suggest( 

169 self, 

170 *, 

171 context: Optional[pd.Series] = None, 

172 defaults: bool = False, 

173 ) -> Suggestion: 

174 """ 

175 Wrapper method, which employs the space adapter (if any), after suggesting a new 

176 configuration. 

177 

178 Parameters 

179 ---------- 

180 context : pandas.Series 

181 Not Yet Implemented. 

182 defaults : bool 

183 Whether or not to return the default config instead of an optimizer guided one. 

184 By default, use the one from the optimizer. 

185 

186 Returns 

187 ------- 

188 suggestion: Suggestion 

189 The suggested point to evaluate. 

190 """ 

191 if defaults: 

192 configuration = config_to_series(self.parameter_space.get_default_configuration()) 

193 if self.space_adapter is not None: 

194 configuration = self.space_adapter.inverse_transform(configuration) 

195 suggestion = Suggestion(config=configuration, context=context, metadata=None) 

196 else: 

197 suggestion = self._suggest(context=context) 

198 assert set(suggestion.config.index).issubset(set(self.optimizer_parameter_space)), ( 

199 "Optimizer suggested a configuration that does " 

200 "not match the expected parameter space." 

201 ) 

202 if self._space_adapter: 

203 suggestion = Suggestion( 

204 config=self._space_adapter.transform(suggestion.config), 

205 context=suggestion.context, 

206 metadata=suggestion.metadata, 

207 ) 

208 assert set(suggestion.config.index).issubset(set(self.parameter_space)), ( 

209 "Space adapter produced a configuration that does " 

210 "not match the expected parameter space." 

211 ) 

212 return suggestion 

213 

214 @abstractmethod 

215 def _suggest( 

216 self, 

217 *, 

218 context: Optional[pd.Series] = None, 

219 ) -> Suggestion: 

220 """ 

221 Suggests a new configuration. 

222 

223 Parameters 

224 ---------- 

225 context : pandas.Series 

226 Not Yet Implemented. 

227 

228 Returns 

229 ------- 

230 suggestion: Suggestion 

231 The suggestion to evaluate. 

232 """ 

233 pass # pylint: disable=unnecessary-pass # pragma: no cover 

234 

235 @abstractmethod 

236 def register_pending(self, pending: Suggestion) -> None: 

237 """ 

238 Registers the given suggestion as "pending". That is it say, it has been 

239 suggested by the optimizer, and an experiment trial has been started. This can 

240 be useful for executing multiple trials in parallel, retry logic, etc. 

241 

242 Parameters 

243 ---------- 

244 pending: Suggestion 

245 The pending suggestion to register. 

246 """ 

247 pass # pylint: disable=unnecessary-pass # pragma: no cover 

248 

249 def get_observations(self) -> Observations: 

250 """ 

251 Returns the observations as a triplet of DataFrames (config, score, context). 

252 

253 Returns 

254 ------- 

255 observations : Observations 

256 All the observations registered so far. 

257 """ 

258 if len(self._observations) == 0: 

259 raise ValueError("No observations registered yet.") 

260 return self._observations 

261 

262 def get_best_observations( 

263 self, 

264 n_max: int = 1, 

265 ) -> Observations: 

266 """ 

267 Get the N best observations so far as a filtered version of Observations. 

268 Default is N=1. The columns are ordered in ASCENDING order of the optimization 

269 targets. The function uses `pandas.DataFrame.nsmallest(..., keep="first")` 

270 method under the hood. 

271 

272 Parameters 

273 ---------- 

274 n_max : int 

275 Maximum number of best observations to return. Default is 1. 

276 

277 Returns 

278 ------- 

279 observations : Observations 

280 A filtered version of Observations with the best N observations. 

281 """ 

282 observations = self.get_observations() 

283 if len(observations) == 0: 

284 raise ValueError("No observations registered yet.") 

285 

286 idx = observations.scores.nsmallest( 

287 n_max, 

288 columns=self._optimization_targets, 

289 keep="first", 

290 ).index 

291 return observations.filter_by_index(idx) 

292 

293 def cleanup(self) -> None: 

294 """ 

295 Remove temp files, release resources, etc. 

296 

297 after use. Default is no-op. Redefine this method in optimizers that require 

298 cleanup. 

299 """ 

300 

301 def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: 

302 """Convert numpy array from one-hot encoding to a DataFrame with categoricals 

303 and ints in proper columns. 

304 """ 

305 df_dict = collections.defaultdict(list) 

306 for i in range(config.shape[0]): 

307 j = 0 

308 for param in self.optimizer_parameter_space.values(): 

309 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

310 for offset, val in enumerate(param.choices): 

311 if config[i][j + offset] == 1: 

312 df_dict[param.name].append(val) 

313 break 

314 j += len(param.choices) 

315 else: 

316 val = config[i][j] 

317 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter): 

318 val = int(val) 

319 df_dict[param.name].append(val) 

320 j += 1 

321 return pd.DataFrame(df_dict) 

322 

323 def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: 

324 """Convert pandas DataFrame to one-hot-encoded numpy array.""" 

325 n_cols = 0 

326 n_rows = config.shape[0] if config.ndim > 1 else 1 

327 for param in self.optimizer_parameter_space.values(): 

328 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

329 n_cols += len(param.choices) 

330 else: 

331 n_cols += 1 

332 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32) 

333 for i in range(n_rows): 

334 j = 0 

335 for param in self.optimizer_parameter_space.values(): 

336 if config.ndim > 1: 

337 assert isinstance(config, pd.DataFrame) 

338 col = config.columns.get_loc(param.name) 

339 assert isinstance(col, int) 

340 val = config.iloc[i, col] 

341 else: 

342 assert isinstance(config, pd.Series) 

343 col = config.index.get_loc(param.name) 

344 assert isinstance(col, int) 

345 val = config.iloc[col] 

346 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

347 offset = param.choices.index(val) 

348 one_hot[i][j + offset] = 1 

349 j += len(param.choices) 

350 else: 

351 one_hot[i][j] = val 

352 j += 1 

353 return one_hot