Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 95%

131 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-05 00:36 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Base class for an interface between the benchmarking framework 

7and mlos_core optimizers. 

8""" 

9 

10import logging 

11from abc import ABCMeta, abstractmethod 

12from distutils.util import strtobool # pylint: disable=deprecated-module 

13 

14from types import TracebackType 

15from typing import Dict, Optional, Sequence, Tuple, Type, Union 

16from typing_extensions import Literal 

17 

18from ConfigSpace import ConfigurationSpace 

19 

20from mlos_bench.config.schemas import ConfigSchema 

21from mlos_bench.services.base_service import Service 

22from mlos_bench.environments.status import Status 

23from mlos_bench.tunables.tunable import TunableValue 

24from mlos_bench.tunables.tunable_groups import TunableGroups 

25from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace 

26 

27_LOG = logging.getLogger(__name__) 

28 

29 

30class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes 

31 """ 

32 An abstract interface between the benchmarking framework and mlos_core optimizers. 

33 """ 

34 

35 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json 

36 BASE_SUPPORTED_CONFIG_PROPS = { 

37 "optimization_targets", 

38 "max_suggestions", 

39 "seed", 

40 "start_with_defaults", 

41 } 

42 

43 def __init__(self, 

44 tunables: TunableGroups, 

45 config: dict, 

46 global_config: Optional[dict] = None, 

47 service: Optional[Service] = None): 

48 """ 

49 Create a new optimizer for the given configuration space defined by the tunables. 

50 

51 Parameters 

52 ---------- 

53 tunables : TunableGroups 

54 The tunables to optimize. 

55 config : dict 

56 Free-format key/value pairs of configuration parameters to pass to the optimizer. 

57 global_config : Optional[dict] 

58 service : Optional[Service] 

59 """ 

60 _LOG.info("Create optimizer for: %s", tunables) 

61 _LOG.debug("Optimizer config: %s", config) 

62 self._validate_json_config(config) 

63 self._config = config.copy() 

64 self._global_config = global_config or {} 

65 self._tunables = tunables 

66 self._config_space: Optional[ConfigurationSpace] = None 

67 self._service = service 

68 self._seed = int(config.get("seed", 42)) 

69 self._in_context = False 

70 

71 experiment_id = self._global_config.get('experiment_id') 

72 self.experiment_id = str(experiment_id).strip() if experiment_id else None 

73 

74 self._iter = 0 

75 # If False, use the optimizer to suggest the initial configuration; 

76 # if True (default), use the already initialized values for the first iteration. 

77 self._start_with_defaults: bool = bool( 

78 strtobool(str(self._config.pop('start_with_defaults', True)))) 

79 self._max_iter = int(self._config.pop('max_suggestions', 100)) 

80 

81 opt_targets: Dict[str, str] = self._config.pop('optimization_targets', {'score': 'min'}) 

82 if not isinstance(opt_targets, dict): 

83 raise ValueError(f"optimization_targets should be a dict: {opt_targets}") 

84 # TODO: Implement multi-target optimization. 

85 if len(opt_targets) != 1: 

86 raise NotImplementedError("Multi-target optimization is not implemented.") 

87 (self._opt_target, opt_dir) = list(opt_targets.items())[0] 

88 self._opt_sign = {"min": 1, "max": -1}[opt_dir] 

89 

90 def _validate_json_config(self, config: dict) -> None: 

91 """ 

92 Reconstructs a basic json config that this class might have been 

93 instantiated from in order to validate configs provided outside the 

94 file loading mechanism. 

95 """ 

96 json_config: dict = { 

97 "class": self.__class__.__module__ + "." + self.__class__.__name__, 

98 } 

99 if config: 

100 json_config["config"] = config 

101 ConfigSchema.OPTIMIZER.validate(json_config) 

102 

103 def __repr__(self) -> str: 

104 opt_direction = 'min' if self.is_min else 'max' 

105 return f"{self.name}:{opt_direction}({self.target})(config={self._config})" 

106 

107 def __enter__(self) -> 'Optimizer': 

108 """ 

109 Enter the optimizer's context. 

110 """ 

111 _LOG.debug("Optimizer START :: %s", self) 

112 assert not self._in_context 

113 self._in_context = True 

114 return self 

115 

116 def __exit__(self, ex_type: Optional[Type[BaseException]], 

117 ex_val: Optional[BaseException], 

118 ex_tb: Optional[TracebackType]) -> Literal[False]: 

119 """ 

120 Exit the context of the optimizer. 

121 """ 

122 if ex_val is None: 

123 _LOG.debug("Optimizer END :: %s", self) 

124 else: 

125 assert ex_type and ex_val 

126 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb)) 

127 assert self._in_context 

128 self._in_context = False 

129 return False # Do not suppress exceptions 

130 

131 @property 

132 def current_iteration(self) -> int: 

133 """ 

134 The current number of iterations (trials) registered. 

135 

136 Note: this may or may not be the same as the number of configurations. 

137 See Also: Launcher.trial_config_repeat_count. 

138 """ 

139 return self._iter 

140 

141 @property 

142 def max_iterations(self) -> int: 

143 """ 

144 The maximum number of iterations (trials) to run. 

145 

146 Note: this may or may not be the same as the number of configurations. 

147 See Also: Launcher.trial_config_repeat_count. 

148 """ 

149 return self._max_iter 

150 

151 @property 

152 def seed(self) -> int: 

153 """ 

154 The random seed for the optimizer. 

155 """ 

156 return self._seed 

157 

158 @property 

159 def start_with_defaults(self) -> bool: 

160 """ 

161 Return True if the optimizer should start with the default values. 

162 Note: This parameter is mutable and will be reset to False after the 

163 defaults are first suggested. 

164 """ 

165 return self._start_with_defaults 

166 

167 @property 

168 def tunable_params(self) -> TunableGroups: 

169 """ 

170 Get the tunable parameters of the optimizer as TunableGroups. 

171 

172 Returns 

173 ------- 

174 tunables : TunableGroups 

175 A collection of covariant groups of tunable parameters. 

176 """ 

177 return self._tunables 

178 

179 @property 

180 def config_space(self) -> ConfigurationSpace: 

181 """ 

182 Get the tunable parameters of the optimizer as a ConfigurationSpace. 

183 

184 Returns 

185 ------- 

186 ConfigurationSpace 

187 The ConfigSpace representation of the tunable parameters. 

188 """ 

189 if self._config_space is None: 

190 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed) 

191 _LOG.debug("ConfigSpace: %s", self._config_space) 

192 return self._config_space 

193 

194 @property 

195 def name(self) -> str: 

196 """ 

197 The name of the optimizer. We save this information in 

198 mlos_bench storage to track the source of each configuration. 

199 """ 

200 return self.__class__.__name__ 

201 

202 # TODO: Expand these properties for multi-objective. 

203 

204 @property 

205 def is_min(self) -> bool: 

206 """ 

207 True if minimizing, False otherwise. Minimization is the default. 

208 """ 

209 return self._opt_sign > 0 

210 

211 @property 

212 def target(self) -> str: 

213 """ 

214 The name of the target metric to optimize. 

215 """ 

216 return self._opt_target 

217 

218 @property 

219 def direction(self) -> Literal['min', 'max']: 

220 """ 

221 The direction to optimize the target metric (e.g., min or max). 

222 """ 

223 return 'min' if self.is_min else 'max' 

224 

225 @property 

226 def supports_preload(self) -> bool: 

227 """ 

228 Return True if the optimizer supports pre-loading the data from previous experiments. 

229 """ 

230 return True 

231 

232 @abstractmethod 

233 def bulk_register(self, 

234 configs: Sequence[dict], 

235 scores: Sequence[Optional[Dict[str, TunableValue]]], 

236 status: Optional[Sequence[Status]] = None) -> bool: 

237 """ 

238 Pre-load the optimizer with the bulk data from previous experiments. 

239 

240 Parameters 

241 ---------- 

242 configs : Sequence[dict] 

243 Records of tunable values from other experiments. 

244 scores : Sequence[Optional[Dict[str, TunableValue]]] 

245 Benchmark results from experiments that correspond to `configs`. 

246 status : Optional[Sequence[Status]] 

247 Status of the experiments that correspond to `configs`. 

248 

249 Returns 

250 ------- 

251 is_not_empty : bool 

252 True if there is data to register, false otherwise. 

253 """ 

254 _LOG.info("Update the optimizer with: %d configs, %d scores, %d status values", 

255 len(configs or []), len(scores or []), len(status or [])) 

256 if len(configs or []) != len(scores or []): 

257 raise ValueError("Numbers of configs and scores do not match.") 

258 if status is not None and len(configs or []) != len(status or []): 

259 raise ValueError("Numbers of configs and status values do not match.") 

260 has_data = bool(configs and scores) 

261 if has_data and self._start_with_defaults: 

262 _LOG.info("Prior data exists - do *NOT* use the default initialization.") 

263 self._start_with_defaults = False 

264 return has_data 

265 

266 def suggest(self) -> TunableGroups: 

267 """ 

268 Generate the next suggestion. 

269 Base class' implementation increments the iteration count 

270 and returns the current values of the tunables. 

271 

272 Returns 

273 ------- 

274 tunables : TunableGroups 

275 The next configuration to benchmark. 

276 These are the same tunables we pass to the constructor, 

277 but with the values set to the next suggestion. 

278 """ 

279 self._iter += 1 

280 _LOG.debug("Iteration %d :: Suggest", self._iter) 

281 return self._tunables.copy() 

282 

283 @abstractmethod 

284 def register(self, tunables: TunableGroups, status: Status, 

285 score: Optional[Union[float, Dict[str, float]]] = None) -> Optional[float]: 

286 """ 

287 Register the observation for the given configuration. 

288 

289 Parameters 

290 ---------- 

291 tunables : TunableGroups 

292 The configuration that has been benchmarked. 

293 Usually it's the same config that the `.suggest()` method returned. 

294 status : Status 

295 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

296 score : Union[float, Dict[str, float]] 

297 A scalar or a dict with the final benchmark results. 

298 None if the experiment was not successful. 

299 

300 Returns 

301 ------- 

302 value : float 

303 The scalar benchmark score extracted (and possibly transformed) from the dataframe that's being minimized. 

304 """ 

305 _LOG.info("Iteration %d :: Register: %s = %s score: %s", 

306 self._iter, tunables, status, score) 

307 if status.is_succeeded() == (score is None): # XOR 

308 raise ValueError("Status and score must be consistent.") 

309 return self._get_score(status, score) 

310 

311 def _get_score(self, status: Status, score: Optional[Union[float, Dict[str, float]]]) -> Optional[float]: 

312 """ 

313 Extract a scalar benchmark score from the dataframe. 

314 Change the sign if we are maximizing. 

315 

316 Parameters 

317 ---------- 

318 status : Status 

319 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

320 score : Union[float, Dict[str, float]] 

321 A scalar or a dict with the final benchmark results. 

322 None if the experiment was not successful. 

323 

324 Returns 

325 ------- 

326 score : float 

327 A scalar benchmark score to be used as a primary target for MINIMIZATION. 

328 """ 

329 if not status.is_completed(): 

330 return None 

331 if status.is_succeeded(): 

332 assert score is not None 

333 if isinstance(score, dict): 

334 if self._opt_target not in score: 

335 raise ValueError(f"Missing expected optimization target metric '{self._opt_target}' " 

336 + f"in results for iteration {self._iter}: {score}") 

337 score = score[self._opt_target] 

338 return float(score) * self._opt_sign 

339 assert score is None 

340 return float("inf") 

341 

342 def not_converged(self) -> bool: 

343 """ 

344 Return True if not converged, False otherwise. 

345 Base implementation just checks the iteration count. 

346 """ 

347 return self._iter < self._max_iter 

348 

349 @abstractmethod 

350 def get_best_observation(self) -> Union[Tuple[float, TunableGroups], Tuple[None, None]]: 

351 """ 

352 Get the best observation so far. 

353 

354 Returns 

355 ------- 

356 (value, tunables) : Tuple[float, TunableGroups] 

357 The best value and the corresponding configuration. 

358 (None, None) if no successful observation has been registered yet. 

359 """