Coverage for mlos_core/mlos_core/optimizers/optimizer.py: 99%

115 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-06 00:35 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Contains the BaseOptimizer abstract class. 

7""" 

8 

9import collections 

10from abc import ABCMeta, abstractmethod 

11from typing import List, Optional, Tuple, Union 

12 

13import ConfigSpace 

14import numpy as np 

15import numpy.typing as npt 

16import pandas as pd 

17 

18from mlos_core.util import config_to_dataframe 

19from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

20 

21 

22class BaseOptimizer(metaclass=ABCMeta): 

23 """ 

24 Optimizer abstract base class defining the basic interface. 

25 """ 

26 

27 def __init__(self, *, 

28 parameter_space: ConfigSpace.ConfigurationSpace, 

29 space_adapter: Optional[BaseSpaceAdapter] = None): 

30 """ 

31 Create a new instance of the base optimizer. 

32 

33 Parameters 

34 ---------- 

35 parameter_space : ConfigSpace.ConfigurationSpace 

36 The parameter space to optimize. 

37 space_adapter : BaseSpaceAdapter 

38 The space adapter class to employ for parameter space transformations. 

39 """ 

40 self.parameter_space: ConfigSpace.ConfigurationSpace = parameter_space 

41 self.optimizer_parameter_space: ConfigSpace.ConfigurationSpace = \ 

42 parameter_space if space_adapter is None else space_adapter.target_parameter_space 

43 

44 if space_adapter is not None and space_adapter.orig_parameter_space != parameter_space: 

45 raise ValueError("Given parameter space differs from the one given to space adapter") 

46 

47 self._space_adapter: Optional[BaseSpaceAdapter] = space_adapter 

48 self._observations: List[Tuple[pd.DataFrame, pd.Series, Optional[pd.DataFrame]]] = [] 

49 self._has_context: Optional[bool] = None 

50 self._pending_observations: List[Tuple[pd.DataFrame, Optional[pd.DataFrame]]] = [] 

51 

52 def __repr__(self) -> str: 

53 return f"{self.__class__.__name__}(space_adapter={self.space_adapter})" 

54 

55 @property 

56 def space_adapter(self) -> Optional[BaseSpaceAdapter]: 

57 """Get the space adapter instance (if any).""" 

58 return self._space_adapter 

59 

60 def register(self, configurations: pd.DataFrame, scores: pd.Series, 

61 context: Optional[pd.DataFrame] = None) -> None: 

62 """Wrapper method, which employs the space adapter (if any), before registering the configurations and scores. 

63 

64 Parameters 

65 ---------- 

66 configurations : pd.DataFrame 

67 Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. 

68 scores : pd.Series 

69 Scores from running the configurations. The index is the same as the index of the configurations. 

70 

71 context : pd.DataFrame 

72 Not Yet Implemented. 

73 """ 

74 # Do some input validation. 

75 assert self._has_context is None or self._has_context ^ (context is None), \ 

76 "Context must always be added or never be added." 

77 assert len(configurations) == len(scores), \ 

78 "Mismatched number of configurations and scores." 

79 if context is not None: 

80 assert len(configurations) == len(context), \ 

81 "Mismatched number of configurations and context." 

82 assert configurations.shape[1] == len(self.parameter_space.values()), \ 

83 "Mismatched configuration shape." 

84 self._observations.append((configurations, scores, context)) 

85 self._has_context = context is not None 

86 

87 if self._space_adapter: 

88 configurations = self._space_adapter.inverse_transform(configurations) 

89 assert configurations.shape[1] == len(self.optimizer_parameter_space.values()), \ 

90 "Mismatched configuration shape after inverse transform." 

91 return self._register(configurations, scores, context) 

92 

93 @abstractmethod 

94 def _register(self, configurations: pd.DataFrame, scores: pd.Series, 

95 context: Optional[pd.DataFrame] = None) -> None: 

96 """Registers the given configurations and scores. 

97 

98 Parameters 

99 ---------- 

100 configurations : pd.DataFrame 

101 Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. 

102 scores : pd.Series 

103 Scores from running the configurations. The index is the same as the index of the configurations. 

104 

105 context : pd.DataFrame 

106 Not Yet Implemented. 

107 """ 

108 pass # pylint: disable=unnecessary-pass # pragma: no cover 

109 

110 def suggest(self, context: Optional[pd.DataFrame] = None, defaults: bool = False) -> pd.DataFrame: 

111 """ 

112 Wrapper method, which employs the space adapter (if any), after suggesting a new configuration. 

113 

114 Parameters 

115 ---------- 

116 context : pd.DataFrame 

117 Not Yet Implemented. 

118 defaults : bool 

119 Whether or not to return the default config instead of an optimizer guided one. 

120 By default, use the one from the optimizer. 

121 

122 Returns 

123 ------- 

124 configuration : pd.DataFrame 

125 Pandas dataframe with a single row. Column names are the parameter names. 

126 """ 

127 if defaults: 

128 configuration = config_to_dataframe(self.parameter_space.get_default_configuration()) 

129 if self.space_adapter is not None: 

130 configuration = self.space_adapter.inverse_transform(configuration) 

131 else: 

132 configuration = self._suggest(context) 

133 assert len(configuration) == 1, \ 

134 "Suggest must return a single configuration." 

135 assert set(configuration.columns).issubset(set(self.optimizer_parameter_space)), \ 

136 "Optimizer suggested a configuration that does not match the expected parameter space." 

137 if self._space_adapter: 

138 configuration = self._space_adapter.transform(configuration) 

139 assert set(configuration.columns).issubset(set(self.parameter_space)), \ 

140 "Space adapter produced a configuration that does not match the expected parameter space." 

141 return configuration 

142 

143 @abstractmethod 

144 def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: 

145 """Suggests a new configuration. 

146 

147 Parameters 

148 ---------- 

149 context : pd.DataFrame 

150 Not Yet Implemented. 

151 

152 Returns 

153 ------- 

154 configuration : pd.DataFrame 

155 Pandas dataframe with a single row. Column names are the parameter names. 

156 """ 

157 pass # pylint: disable=unnecessary-pass # pragma: no cover 

158 

159 @abstractmethod 

160 def register_pending(self, configurations: pd.DataFrame, 

161 context: Optional[pd.DataFrame] = None) -> None: 

162 """Registers the given configurations as "pending". 

163 That is it say, it has been suggested by the optimizer, and an experiment trial has been started. 

164 This can be useful for executing multiple trials in parallel, retry logic, etc. 

165 

166 Parameters 

167 ---------- 

168 configurations : pd.DataFrame 

169 Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. 

170 context : pd.DataFrame 

171 Not Yet Implemented. 

172 """ 

173 pass # pylint: disable=unnecessary-pass # pragma: no cover 

174 

175 def get_observations(self) -> pd.DataFrame: 

176 """Returns the observations as a dataframe. 

177 

178 Returns 

179 ------- 

180 observations : pd.DataFrame 

181 Dataframe of observations. The columns are parameter names and "score" for the score, each row is an observation. 

182 """ 

183 if len(self._observations) == 0: 

184 raise ValueError("No observations registered yet.") 

185 configs = pd.concat([config for config, _, _ in self._observations]) 

186 scores = pd.concat([score for _, score, _ in self._observations]) 

187 try: 

188 contexts = pd.concat([context for _, _, context in self._observations if context is not None]) 

189 except ValueError: 

190 contexts = None 

191 configs["score"] = scores 

192 if contexts is not None: 

193 # configs = pd.concat([configs, contexts], axis=1) 

194 # Not reachable for now 

195 raise NotImplementedError() 

196 return configs 

197 

198 def get_best_observation(self) -> pd.DataFrame: 

199 """Returns the best observation so far as a dataframe. 

200 

201 Returns 

202 ------- 

203 best_observation : pd.DataFrame 

204 Dataframe with a single row containing the best observation. The columns are parameter names and "score" for the score. 

205 """ 

206 if len(self._observations) == 0: 

207 raise ValueError("No observations registered yet.") 

208 observations = self.get_observations() 

209 return observations.nsmallest(1, columns='score') 

210 

211 def cleanup(self) -> None: 

212 """ 

213 Remove temp files, release resources, etc. after use. Default is no-op. 

214 Redefine this method in optimizers that require cleanup. 

215 """ 

216 

217 def _from_1hot(self, config: npt.NDArray) -> pd.DataFrame: 

218 """ 

219 Convert numpy array from one-hot encoding to a DataFrame 

220 with categoricals and ints in proper columns. 

221 """ 

222 df_dict = collections.defaultdict(list) 

223 for i in range(config.shape[0]): 

224 j = 0 

225 for param in self.optimizer_parameter_space.values(): 

226 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

227 for (offset, val) in enumerate(param.choices): 

228 if config[i][j + offset] == 1: 

229 df_dict[param.name].append(val) 

230 break 

231 j += len(param.choices) 

232 else: 

233 val = config[i][j] 

234 if isinstance(param, ConfigSpace.UniformIntegerHyperparameter): 

235 val = int(val) 

236 df_dict[param.name].append(val) 

237 j += 1 

238 return pd.DataFrame(df_dict) 

239 

240 def _to_1hot(self, config: Union[pd.DataFrame, pd.Series]) -> npt.NDArray: 

241 """ 

242 Convert pandas DataFrame to one-hot-encoded numpy array. 

243 """ 

244 n_cols = 0 

245 n_rows = config.shape[0] if config.ndim > 1 else 1 

246 for param in self.optimizer_parameter_space.values(): 

247 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

248 n_cols += len(param.choices) 

249 else: 

250 n_cols += 1 

251 one_hot = np.zeros((n_rows, n_cols), dtype=np.float32) 

252 for i in range(n_rows): 

253 j = 0 

254 for param in self.optimizer_parameter_space.values(): 

255 if config.ndim > 1: 

256 assert isinstance(config, pd.DataFrame) 

257 col = config.columns.get_loc(param.name) 

258 assert isinstance(col, int) 

259 val = config.iloc[i, col] 

260 else: 

261 assert isinstance(config, pd.Series) 

262 col = config.index.get_loc(param.name) 

263 assert isinstance(col, int) 

264 val = config.iloc[col] 

265 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

266 offset = param.choices.index(val) 

267 one_hot[i][j + offset] = 1 

268 j += len(param.choices) 

269 else: 

270 one_hot[i][j] = val 

271 j += 1 

272 return one_hot