Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 94%

126 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-20 00:44 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Base class for an interface between the benchmarking framework and mlos_core 

6optimizers. 

7""" 

8 

9import logging 

10from abc import ABCMeta, abstractmethod 

11from types import TracebackType 

12from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union 

13 

14from ConfigSpace import ConfigurationSpace 

15 

16from mlos_bench.config.schemas import ConfigSchema 

17from mlos_bench.environments.status import Status 

18from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace 

19from mlos_bench.services.base_service import Service 

20from mlos_bench.tunables.tunable import TunableValue 

21from mlos_bench.tunables.tunable_groups import TunableGroups 

22from mlos_bench.util import strtobool 

23 

24_LOG = logging.getLogger(__name__) 

25 

26 

27class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes 

28 """An abstract interface between the benchmarking framework and mlos_core 

29 optimizers. 

30 """ 

31 

32 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json 

33 BASE_SUPPORTED_CONFIG_PROPS = { 

34 "optimization_targets", 

35 "max_suggestions", 

36 "seed", 

37 "start_with_defaults", 

38 } 

39 

40 def __init__( 

41 self, 

42 tunables: TunableGroups, 

43 config: dict, 

44 global_config: Optional[dict] = None, 

45 service: Optional[Service] = None, 

46 ): 

47 """ 

48 Create a new optimizer for the given configuration space defined by the 

49 tunables. 

50 

51 Parameters 

52 ---------- 

53 tunables : TunableGroups 

54 The tunables to optimize. 

55 config : dict 

56 Free-format key/value pairs of configuration parameters to pass to the optimizer. 

57 global_config : Optional[dict] 

58 service : Optional[Service] 

59 """ 

60 _LOG.info("Create optimizer for: %s", tunables) 

61 _LOG.debug("Optimizer config: %s", config) 

62 self._validate_json_config(config) 

63 self._config = config.copy() 

64 self._global_config = global_config or {} 

65 self._tunables = tunables 

66 self._config_space: Optional[ConfigurationSpace] = None 

67 self._service = service 

68 self._seed = int(config.get("seed", 42)) 

69 self._in_context = False 

70 

71 experiment_id = self._global_config.get("experiment_id") 

72 self.experiment_id = str(experiment_id).strip() if experiment_id else None 

73 

74 self._iter = 0 

75 # If False, use the optimizer to suggest the initial configuration; 

76 # if True (default), use the already initialized values for the first iteration. 

77 self._start_with_defaults: bool = bool( 

78 strtobool(str(self._config.pop("start_with_defaults", True))) 

79 ) 

80 self._max_suggestions = int(self._config.pop("max_suggestions", 100)) 

81 

82 opt_targets: Dict[str, str] = self._config.pop("optimization_targets", {"score": "min"}) 

83 self._opt_targets: Dict[str, Literal[1, -1]] = {} 

84 for opt_target, opt_dir in opt_targets.items(): 

85 if opt_dir == "min": 

86 self._opt_targets[opt_target] = 1 

87 elif opt_dir == "max": 

88 self._opt_targets[opt_target] = -1 

89 else: 

90 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}") 

91 

92 def _validate_json_config(self, config: dict) -> None: 

93 """Reconstructs a basic json config that this class might have been instantiated 

94 from in order to validate configs provided outside the file loading 

95 mechanism. 

96 """ 

97 json_config: dict = { 

98 "class": self.__class__.__module__ + "." + self.__class__.__name__, 

99 } 

100 if config: 

101 json_config["config"] = config 

102 ConfigSchema.OPTIMIZER.validate(json_config) 

103 

104 def __repr__(self) -> str: 

105 opt_targets = ",".join( 

106 f"""{opt_target}:{({1: "min", -1: "max"}[opt_dir])}""" 

107 for (opt_target, opt_dir) in self._opt_targets.items() 

108 ) 

109 return f"{self.name}({opt_targets},config={self._config})" 

110 

111 def __enter__(self) -> "Optimizer": 

112 """Enter the optimizer's context.""" 

113 _LOG.debug("Optimizer START :: %s", self) 

114 assert not self._in_context 

115 self._in_context = True 

116 return self 

117 

118 def __exit__( 

119 self, 

120 ex_type: Optional[Type[BaseException]], 

121 ex_val: Optional[BaseException], 

122 ex_tb: Optional[TracebackType], 

123 ) -> Literal[False]: 

124 """Exit the context of the optimizer.""" 

125 if ex_val is None: 

126 _LOG.debug("Optimizer END :: %s", self) 

127 else: 

128 assert ex_type and ex_val 

129 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb)) 

130 assert self._in_context 

131 self._in_context = False 

132 return False # Do not suppress exceptions 

133 

134 @property 

135 def current_iteration(self) -> int: 

136 """ 

137 The current number of iterations (suggestions) registered. 

138 

139 Note: this may or may not be the same as the number of configurations. 

140 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. 

141 """ 

142 return self._iter 

143 

144 @property 

145 def max_suggestions(self) -> int: 

146 """ 

147 The maximum number of iterations (suggestions) to run. 

148 

149 Note: this may or may not be the same as the number of configurations. 

150 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials. 

151 """ 

152 return self._max_suggestions 

153 

154 @property 

155 def seed(self) -> int: 

156 """The random seed for the optimizer.""" 

157 return self._seed 

158 

159 @property 

160 def start_with_defaults(self) -> bool: 

161 """ 

162 Return True if the optimizer should start with the default values. 

163 

164 Note: This parameter is mutable and will be reset to False after the 

165 defaults are first suggested. 

166 """ 

167 return self._start_with_defaults 

168 

169 @property 

170 def tunable_params(self) -> TunableGroups: 

171 """ 

172 Get the tunable parameters of the optimizer as TunableGroups. 

173 

174 Returns 

175 ------- 

176 tunables : TunableGroups 

177 A collection of covariant groups of tunable parameters. 

178 """ 

179 return self._tunables 

180 

181 @property 

182 def config_space(self) -> ConfigurationSpace: 

183 """ 

184 Get the tunable parameters of the optimizer as a ConfigurationSpace. 

185 

186 Returns 

187 ------- 

188 ConfigSpace.ConfigurationSpace 

189 The ConfigSpace representation of the tunable parameters. 

190 """ 

191 if self._config_space is None: 

192 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed) 

193 _LOG.debug("ConfigSpace: %s", self._config_space) 

194 return self._config_space 

195 

196 @property 

197 def name(self) -> str: 

198 """ 

199 The name of the optimizer. 

200 

201 We save this information in mlos_bench storage to track the source of each 

202 configuration. 

203 """ 

204 return self.__class__.__name__ 

205 

206 @property 

207 def targets(self) -> Dict[str, Literal["min", "max"]]: 

208 """Returns a dictionary of optimization targets and their direction.""" 

209 return { 

210 opt_target: "min" if opt_dir == 1 else "max" 

211 for (opt_target, opt_dir) in self._opt_targets.items() 

212 } 

213 

214 @property 

215 def supports_preload(self) -> bool: 

216 """Return True if the optimizer supports pre-loading the data from previous 

217 experiments. 

218 """ 

219 return True 

220 

221 @abstractmethod 

222 def bulk_register( 

223 self, 

224 configs: Sequence[dict], 

225 scores: Sequence[Optional[Dict[str, TunableValue]]], 

226 status: Optional[Sequence[Status]] = None, 

227 ) -> bool: 

228 """ 

229 Pre-load the optimizer with the bulk data from previous experiments. 

230 

231 Parameters 

232 ---------- 

233 configs : Sequence[dict] 

234 Records of tunable values from other experiments. 

235 scores : Sequence[Optional[Dict[str, TunableValue]]] 

236 Benchmark results from experiments that correspond to `configs`. 

237 status : Optional[Sequence[Status]] 

238 Status of the experiments that correspond to `configs`. 

239 

240 Returns 

241 ------- 

242 is_not_empty : bool 

243 True if there is data to register, false otherwise. 

244 """ 

245 _LOG.info( 

246 "Update the optimizer with: %d configs, %d scores, %d status values", 

247 len(configs or []), 

248 len(scores or []), 

249 len(status or []), 

250 ) 

251 if len(configs or []) != len(scores or []): 

252 raise ValueError("Numbers of configs and scores do not match.") 

253 if status is not None and len(configs or []) != len(status or []): 

254 raise ValueError("Numbers of configs and status values do not match.") 

255 has_data = bool(configs and scores) 

256 if has_data and self._start_with_defaults: 

257 _LOG.info("Prior data exists - do *NOT* use the default initialization.") 

258 self._start_with_defaults = False 

259 return has_data 

260 

261 def suggest(self) -> TunableGroups: 

262 """ 

263 Generate the next suggestion. Base class' implementation increments the 

264 iteration count and returns the current values of the tunables. 

265 

266 Returns 

267 ------- 

268 tunables : TunableGroups 

269 The next configuration to benchmark. 

270 These are the same tunables we pass to the constructor, 

271 but with the values set to the next suggestion. 

272 """ 

273 self._iter += 1 

274 _LOG.debug("Iteration %d :: Suggest", self._iter) 

275 return self._tunables.copy() 

276 

277 @abstractmethod 

278 def register( 

279 self, 

280 tunables: TunableGroups, 

281 status: Status, 

282 score: Optional[Dict[str, TunableValue]] = None, 

283 ) -> Optional[Dict[str, float]]: 

284 """ 

285 Register the observation for the given configuration. 

286 

287 Parameters 

288 ---------- 

289 tunables : TunableGroups 

290 The configuration that has been benchmarked. 

291 Usually it's the same config that the `.suggest()` method returned. 

292 status : Status 

293 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

294 score : Optional[Dict[str, TunableValue]] 

295 A dict with the final benchmark results. 

296 None if the experiment was not successful. 

297 

298 Returns 

299 ------- 

300 value : Optional[Dict[str, float]] 

301 Benchmark scores extracted (and possibly transformed) 

302 from the dataframe that's being MINIMIZED. 

303 """ 

304 _LOG.info( 

305 "Iteration %d :: Register: %s = %s score: %s", 

306 self._iter, 

307 tunables, 

308 status, 

309 score, 

310 ) 

311 if status.is_succeeded() == (score is None): # XOR 

312 raise ValueError("Status and score must be consistent.") 

313 return self._get_scores(status, score) 

314 

315 def _get_scores( 

316 self, 

317 status: Status, 

318 scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]], 

319 ) -> Optional[Dict[str, float]]: 

320 """ 

321 Extract a scalar benchmark score from the dataframe. Change the sign if we are 

322 maximizing. 

323 

324 Parameters 

325 ---------- 

326 status : Status 

327 Final status of the experiment (e.g., SUCCEEDED or FAILED). 

328 scores : Optional[Dict[str, TunableValue]] 

329 A dict with the final benchmark results. 

330 None if the experiment was not successful. 

331 

332 Returns 

333 ------- 

334 score : Optional[Dict[str, float]] 

335 An optional dict of benchmark scores to be used as targets for MINIMIZATION. 

336 """ 

337 if not status.is_completed(): 

338 return None 

339 

340 if not status.is_succeeded(): 

341 assert scores is None 

342 # TODO: Be more flexible with values used for failed trials (not just +inf). 

343 # Issue: https://github.com/microsoft/MLOS/issues/523 

344 return {opt_target: float("inf") for opt_target in self._opt_targets} 

345 

346 assert scores is not None 

347 target_metrics: Dict[str, float] = {} 

348 for opt_target, opt_dir in self._opt_targets.items(): 

349 val = scores[opt_target] 

350 assert val is not None 

351 target_metrics[opt_target] = float(val) * opt_dir 

352 

353 return target_metrics 

354 

355 def not_converged(self) -> bool: 

356 """ 

357 Return True if not converged, False otherwise. 

358 

359 Base implementation just checks the iteration count. 

360 """ 

361 return self._iter < self._max_suggestions 

362 

363 @abstractmethod 

364 def get_best_observation( 

365 self, 

366 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]: 

367 """ 

368 Get the best observation so far. 

369 

370 Returns 

371 ------- 

372 (value, tunables) : Tuple[Dict[str, float], TunableGroups] 

373 The best value and the corresponding configuration. 

374 (None, None) if no successful observation has been registered yet. 

375 """