Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 95%

128 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-01 00:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Base class for an interface between the benchmarking framework and :py:mod:`mlos_core` 

7optimizers and other config suggestion methods. 

8 

9See Also 

10-------- 

11mlos_bench.optimizers : 

12 For more information on the available optimizers and their usage. 

13""" 

14 

15import logging 

16from abc import ABCMeta, abstractmethod 

17from collections.abc import Sequence 

18from contextlib import AbstractContextManager as ContextManager 

19from types import TracebackType 

20from typing import Literal 

21 

22from ConfigSpace import ConfigurationSpace 

23 

24from mlos_bench.config.schemas import ConfigSchema 

25from mlos_bench.environments.status import Status 

26from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace 

27from mlos_bench.services.base_service import Service 

28from mlos_bench.tunables.tunable_groups import TunableGroups 

29from mlos_bench.tunables.tunable_types import TunableValue 

30from mlos_bench.util import strtobool 

31 

32_LOG = logging.getLogger(__name__) 

33 

34 

35class Optimizer(ContextManager, metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes 

36 """An abstract interface between the benchmarking framework and :py:mod:`mlos_core` 

37 optimizers and other config suggestion methods. 

38 """ 

39 

40 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json 

41 BASE_SUPPORTED_CONFIG_PROPS = { 

42 "optimization_targets", 

43 "max_suggestions", 

44 "seed", 

45 "start_with_defaults", 

46 } 

47 

48 def __init__( 

49 self, 

50 tunables: TunableGroups, 

51 config: dict, 

52 global_config: dict | None = None, 

53 service: Service | None = None, 

54 ): 

55 """ 

56 Create a new optimizer for the given configuration space defined by the 

57 tunables. 

58 

59 Parameters 

60 ---------- 

61 tunables : TunableGroups 

62 The tunables to optimize. 

63 config : dict 

64 Free-format key/value pairs of configuration parameters to pass to the optimizer. 

65 global_config : dict | None 

66 service : Service | None 

67 """ 

68 _LOG.info("Create optimizer for: %s", tunables) 

69 _LOG.debug("Optimizer config: %s", config) 

70 self._validate_json_config(config) 

71 self._config = config.copy() 

72 self._global_config = global_config or {} 

73 self._tunables = tunables 

74 self._config_space: ConfigurationSpace | None = None 

75 self._service = service 

76 self._seed = int(config.get("seed", 42)) 

77 self._in_context = False 

78 

79 experiment_id = self._global_config.get("experiment_id") 

80 self.experiment_id = str(experiment_id).strip() if experiment_id else None 

81 

82 self._iter = 0 

83 # If False, use the optimizer to suggest the initial configuration; 

84 # if True (default), use the already initialized values for the first iteration. 

85 self._start_with_defaults: bool = bool( 

86 strtobool(str(self._config.pop("start_with_defaults", True))) 

87 ) 

88 self._max_suggestions = int(self._config.pop("max_suggestions", 100)) 

89 

90 opt_targets: dict[str, str] = self._config.pop("optimization_targets", {"score": "min"}) 

91 self._opt_targets: dict[str, Literal[1, -1]] = {} 

92 for opt_target, opt_dir in opt_targets.items(): 

93 if opt_dir == "min": 

94 self._opt_targets[opt_target] = 1 

95 elif opt_dir == "max": 

96 self._opt_targets[opt_target] = -1 

97 else: 

98 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}") 

99 

100 def _validate_json_config(self, config: dict) -> None: 

101 """Reconstructs a basic json config that this class might have been instantiated 

102 from in order to validate configs provided outside the file loading 

103 mechanism. 

104 """ 

105 json_config: dict = { 

106 "class": self.__class__.__module__ + "." + self.__class__.__name__, 

107 } 

108 if config: 

109 json_config["config"] = config 

110 ConfigSchema.OPTIMIZER.validate(json_config) 

111 

112 def __repr__(self) -> str: 

113 opt_targets = ",".join( 

114 f"""{opt_target}:{({1: "min", -1: "max"}[opt_dir])}""" 

115 for (opt_target, opt_dir) in self._opt_targets.items() 

116 ) 

117 return f"{self.name}({opt_targets},config={self._config})" 

118 

119 def __enter__(self) -> "Optimizer": 

120 """Enter the optimizer's context.""" 

121 _LOG.debug("Optimizer START :: %s", self) 

122 assert not self._in_context 

123 self._in_context = True 

124 return self 

125 

126 def __exit__( 

127 self, 

128 ex_type: type[BaseException] | None, 

129 ex_val: BaseException | None, 

130 ex_tb: TracebackType | None, 

131 ) -> Literal[False]: 

132 """Exit the context of the optimizer.""" 

133 if ex_val is None: 

134 _LOG.debug("Optimizer END :: %s", self) 

135 else: 

136 assert ex_type and ex_val 

137 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb)) 

138 assert self._in_context 

139 self._in_context = False 

140 return False # Do not suppress exceptions 

141 

142 @property 

143 def current_iteration(self) -> int: 

144 """ 

145 The current number of iterations (suggestions) registered. 

146 

147 Note: this may or may not be the same as the number of configurations. 

148 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. 

149 """ 

150 return self._iter 

151 

152 @property 

153 def max_suggestions(self) -> int: 

154 """ 

155 The maximum number of iterations (suggestions) to run. 

156 

157 Note: this may or may not be the same as the number of configurations. 

158 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. 

159 """ 

160 return self._max_suggestions 

161 

162 @property 

163 def seed(self) -> int: 

164 """The random seed for the optimizer.""" 

165 return self._seed 

166 

167 @property 

168 def start_with_defaults(self) -> bool: 

169 """ 

170 Return True if the optimizer should start with the default values. 

171 

172 Note: This parameter is mutable and will be reset to False after the 

173 defaults are first suggested. 

174 """ 

175 return self._start_with_defaults 

176 

177 @property 

178 def tunable_params(self) -> TunableGroups: 

179 """ 

180 Get the tunable parameters of the optimizer as TunableGroups. 

181 

182 Returns 

183 ------- 

184 tunables : TunableGroups 

185 A collection of covariant groups of tunable parameters. 

186 """ 

187 return self._tunables 

188 

189 @property 

190 def config_space(self) -> ConfigurationSpace: 

191 """ 

192 Get the tunable parameters of the optimizer as a ConfigurationSpace. 

193 

194 Returns 

195 ------- 

196 ConfigSpace.ConfigurationSpace 

197 The ConfigSpace representation of the tunable parameters. 

198 """ 

199 if self._config_space is None: 

200 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed) 

201 _LOG.debug("ConfigSpace: %s", self._config_space) 

202 return self._config_space 

203 

204 @property 

205 def name(self) -> str: 

206 """ 

207 The name of the optimizer. 

208 

209 We save this information in mlos_bench storage to track the source of each 

210 configuration. 

211 """ 

212 return self.__class__.__name__ 

213 

214 @property 

215 def targets(self) -> dict[str, Literal["min", "max"]]: 

216 """Returns a dictionary of optimization targets and their direction.""" 

217 return { 

218 opt_target: "min" if opt_dir == 1 else "max" 

219 for (opt_target, opt_dir) in self._opt_targets.items() 

220 } 

221 

222 @property 

223 def supports_preload(self) -> bool: 

224 """Return True if the optimizer supports pre-loading the data from previous 

225 experiments. 

226 """ 

227 return True 

228 

229 @abstractmethod 

230 def bulk_register( 

231 self, 

232 configs: Sequence[dict], 

233 scores: Sequence[dict[str, TunableValue] | None], 

234 status: Sequence[Status] | None = None, 

235 ) -> bool: 

236 """ 

237 Pre-load the optimizer with the bulk data from previous experiments. 

238 

239 Parameters 

240 ---------- 

241 configs : Sequence[dict] 

242 Records of tunable values from other experiments. 

243 scores : Sequence[Optional[dict[str, TunableValue]]] 

244 Benchmark results from experiments that correspond to `configs`. 

245 status : Optional[Sequence[Status]] 

246 Status of the experiments that correspond to `configs`. 

247 

248 Returns 

249 ------- 

250 is_not_empty : bool 

251 True if there is data to register, false otherwise. 

252 """ 

253 _LOG.info( 

254 "Update the optimizer with: %d configs, %d scores, %d status values", 

255 len(configs or []), 

256 len(scores or []), 

257 len(status or []), 

258 ) 

259 if len(configs or []) != len(scores or []): 

260 raise ValueError("Numbers of configs and scores do not match.") 

261 if status is not None and len(configs or []) != len(status or []): 

262 raise ValueError("Numbers of configs and status values do not match.") 

263 has_data = bool(configs and scores) 

264 if has_data and self._start_with_defaults: 

265 _LOG.info("Prior data exists - do *NOT* use the default initialization.") 

266 self._start_with_defaults = False 

267 return has_data 

268 

269 def suggest(self) -> TunableGroups: 

270 """ 

271 Generate the next suggestion. Base class' implementation increments the 

272 iteration count and returns the current values of the tunables. 

273 

274 Returns 

275 ------- 

276 tunables : TunableGroups 

277 The next configuration to benchmark. 

278 These are the same tunables we pass to the constructor, 

279 but with the values set to the next suggestion. 

280 """ 

281 self._iter += 1 

282 _LOG.debug("Iteration %d :: Suggest", self._iter) 

283 return self._tunables.copy() 

284 

285 @abstractmethod 

286 def register( 

287 self, 

288 tunables: TunableGroups, 

289 status: Status, 

290 score: dict[str, TunableValue] | None = None, 

291 ) -> dict[str, float] | None: 

292 """ 

293 Register the observation for the given configuration. 

294 

295 Parameters 

296 ---------- 

297 tunables : TunableGroups 

298 The configuration that has been benchmarked. 

299 Usually it's the same config that the `.suggest()` method returned. 

300 status : Status 

301 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

302 score : Optional[dict[str, TunableValue]] 

303 A dict with the final benchmark results. 

304 None if the experiment was not successful. 

305 

306 Returns 

307 ------- 

308 value : Optional[dict[str, float]] 

309 Benchmark scores extracted (and possibly transformed) 

310 from the dataframe that's being MINIMIZED. 

311 """ 

312 _LOG.info( 

313 "Iteration %d :: Register: %s = %s score: %s", 

314 self._iter, 

315 tunables, 

316 status, 

317 score, 

318 ) 

319 if status.is_succeeded() == (score is None): # XOR 

320 raise ValueError("Status and score must be consistent.") 

321 # FIXME: should maximization problems return -score values to the user, or 

322 # keep that as an internal nuance. 

323 return self._get_scores(status, score) 

324 

325 def _get_scores( 

326 self, 

327 status: Status, 

328 scores: dict[str, TunableValue] | dict[str, float] | None, 

329 ) -> dict[str, float] | None: 

330 """ 

331 Extract a scalar benchmark score from the dataframe. Change the sign if we are 

332 maximizing. 

333 

334 Parameters 

335 ---------- 

336 status : Status 

337 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

338 scores : Optional[dict[str, TunableValue]] 

339 A dict with the final benchmark results. 

340 None if the experiment was not successful. 

341 

342 Returns 

343 ------- 

344 score : Optional[dict[str, float]] 

345 An optional dict of benchmark scores to be used as targets for MINIMIZATION. 

346 """ 

347 if not status.is_completed(): 

348 return None 

349 

350 if not status.is_succeeded(): 

351 assert scores is None 

352 # TODO: Be more flexible with values used for failed trials (not just +inf). 

353 # Issue: https://github.com/microsoft/MLOS/issues/523 

354 return {opt_target: float("inf") for opt_target in self._opt_targets} 

355 

356 assert scores is not None 

357 target_metrics: dict[str, float] = {} 

358 for opt_target, opt_dir in self._opt_targets.items(): 

359 val = scores[opt_target] 

360 assert val is not None 

361 target_metrics[opt_target] = float(val) * opt_dir 

362 

363 return target_metrics 

364 

365 def not_converged(self) -> bool: 

366 """ 

367 Return True if not converged, False otherwise. 

368 

369 Base implementation just checks the iteration count. 

370 """ 

371 return self._iter < self._max_suggestions 

372 

373 @abstractmethod 

374 def get_best_observation( 

375 self, 

376 ) -> tuple[dict[str, float], TunableGroups] | tuple[None, None]: 

377 """ 

378 Get the best observation so far. 

379 

380 Returns 

381 ------- 

382 (value, tunables) : tuple[dict[str, float], TunableGroups] 

383 The best value and the corresponding configuration. 

384 (None, None) if no successful observation has been registered yet. 

385 """