Coverage for mlos_bench/mlos_bench/optimizers/base

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""Base class for an interface between the benchmarking framework and mlos_core

6optimizers.

7"""

9import logging

10from abc import ABCMeta, abstractmethod

11from types import TracebackType

12from typing import Dict, Literal, Optional, Sequence, Tuple, Type, Union

14from ConfigSpace import ConfigurationSpace

16from mlos_bench.config.schemas import ConfigSchema

17from mlos_bench.environments.status import Status

18from mlos_bench.optimizers.convert_configspace import tunable_groups_to_configspace

19from mlos_bench.services.base_service import Service

20from mlos_bench.tunables.tunable import TunableValue

21from mlos_bench.tunables.tunable_groups import TunableGroups

22from mlos_bench.util import strtobool

24_LOG = logging.getLogger(__name__)

27class Optimizer(metaclass=ABCMeta): # pylint: disable=too-many-instance-attributes

28 """An abstract interface between the benchmarking framework and mlos_core

29 optimizers.

30 """

32 # See Also: mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json

33 BASE_SUPPORTED_CONFIG_PROPS = {

34 "optimization_targets",

35 "max_suggestions",

36 "seed",

37 "start_with_defaults",

38 }

40 def __init__(

41 self,

42 tunables: TunableGroups,

43 config: dict,

44 global_config: Optional[dict] = None,

45 service: Optional[Service] = None,

46 ):

47 """

48 Create a new optimizer for the given configuration space defined by the

49 tunables.

51 Parameters

52 ----------

53 tunables : TunableGroups

54 The tunables to optimize.

55 config : dict

56 Free-format key/value pairs of configuration parameters to pass to the optimizer.

57 global_config : Optional[dict]

58 service : Optional[Service]

59 """

60 _LOG.info("Create optimizer for: %s", tunables)

61 _LOG.debug("Optimizer config: %s", config)

62 self._validate_json_config(config)

63 self._config = config.copy()

64 self._global_config = global_config or {}

65 self._tunables = tunables

66 self._config_space: Optional[ConfigurationSpace] = None

67 self._service = service

68 self._seed = int(config.get("seed", 42))

69 self._in_context = False

71 experiment_id = self._global_config.get("experiment_id")

72 self.experiment_id = str(experiment_id).strip() if experiment_id else None

74 self._iter = 0

75 # If False, use the optimizer to suggest the initial configuration;

76 # if True (default), use the already initialized values for the first iteration.

77 self._start_with_defaults: bool = bool(

78 strtobool(str(self._config.pop("start_with_defaults", True)))

79 )

80 self._max_suggestions = int(self._config.pop("max_suggestions", 100))

82 opt_targets: Dict[str, str] = self._config.pop("optimization_targets", {"score": "min"})

83 self._opt_targets: Dict[str, Literal[1, -1]] = {}

84 for opt_target, opt_dir in opt_targets.items():

85 if opt_dir == "min":

86 self._opt_targets[opt_target] = 1

87 elif opt_dir == "max":

88 self._opt_targets[opt_target] = -1

89 else:

90 raise ValueError(f"Invalid optimization direction: {opt_dir} for {opt_target}")

92 def _validate_json_config(self, config: dict) -> None:

93 """Reconstructs a basic json config that this class might have been instantiated

94 from in order to validate configs provided outside the file loading

95 mechanism.

96 """

97 json_config: dict = {

98 "class": self.__class__.__module__ + "." + self.__class__.__name__,

99 }

100 if config:

101 json_config["config"] = config

102 ConfigSchema.OPTIMIZER.validate(json_config)

103

104 def __repr__(self) -> str:

105 opt_targets = ",".join(

106 f"{opt_target}:{({1: 'min', -1: 'max'}[opt_dir])}"

107 for (opt_target, opt_dir) in self._opt_targets.items()

108 )

109 return f"{self.name}({opt_targets},config={self._config})"

110

111 def __enter__(self) -> "Optimizer":

112 """Enter the optimizer's context."""

113 _LOG.debug("Optimizer START :: %s", self)

114 assert not self._in_context

115 self._in_context = True

116 return self

117

118 def __exit__(

119 self,

120 ex_type: Optional[Type[BaseException]],

121 ex_val: Optional[BaseException],

122 ex_tb: Optional[TracebackType],

123 ) -> Literal[False]:

124 """Exit the context of the optimizer."""

125 if ex_val is None:

126 _LOG.debug("Optimizer END :: %s", self)

127 else:

128 assert ex_type and ex_val

129 _LOG.warning("Optimizer END :: %s", self, exc_info=(ex_type, ex_val, ex_tb))

130 assert self._in_context

131 self._in_context = False

132 return False # Do not suppress exceptions

133

134 @property

135 def current_iteration(self) -> int:

136 """

137 The current number of iterations (suggestions) registered.

138

139 Note: this may or may not be the same as the number of configurations.

140 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.

141 """

142 return self._iter

143

144 @property

145 def max_suggestions(self) -> int:

146 """

147 The maximum number of iterations (suggestions) to run.

148

149 Note: this may or may not be the same as the number of configurations.

150 See Also: Scheduler.trial_config_repeat_count and Scheduler.max_trials.

151 """

152 return self._max_suggestions

153

154 @property

155 def seed(self) -> int:

156 """The random seed for the optimizer."""

157 return self._seed

158

159 @property

160 def start_with_defaults(self) -> bool:

161 """

162 Return True if the optimizer should start with the default values.

163

164 Note: This parameter is mutable and will be reset to False after the

165 defaults are first suggested.

166 """

167 return self._start_with_defaults

168

169 @property

170 def tunable_params(self) -> TunableGroups:

171 """

172 Get the tunable parameters of the optimizer as TunableGroups.

173

174 Returns

175 -------

176 tunables : TunableGroups

177 A collection of covariant groups of tunable parameters.

178 """

179 return self._tunables

180

181 @property

182 def config_space(self) -> ConfigurationSpace:

183 """

184 Get the tunable parameters of the optimizer as a ConfigurationSpace.

185

186 Returns

187 -------

188 ConfigSpace.ConfigurationSpace

189 The ConfigSpace representation of the tunable parameters.

190 """

191 if self._config_space is None:

192 self._config_space = tunable_groups_to_configspace(self._tunables, self._seed)

193 _LOG.debug("ConfigSpace: %s", self._config_space)

194 return self._config_space

195

196 @property

197 def name(self) -> str:

198 """

199 The name of the optimizer.

200

201 We save this information in mlos_bench storage to track the source of each

202 configuration.

203 """

204 return self.__class__.__name__

205

206 @property

207 def targets(self) -> Dict[str, Literal["min", "max"]]:

208 """Returns a dictionary of optimization targets and their direction."""

209 return {

210 opt_target: "min" if opt_dir == 1 else "max"

211 for (opt_target, opt_dir) in self._opt_targets.items()

212 }

213

214 @property

215 def supports_preload(self) -> bool:

216 """Return True if the optimizer supports pre-loading the data from previous

217 experiments.

218 """

219 return True

220

221 @abstractmethod

222 def bulk_register(

223 self,

224 configs: Sequence[dict],

225 scores: Sequence[Optional[Dict[str, TunableValue]]],

226 status: Optional[Sequence[Status]] = None,

227 ) -> bool:

228 """

229 Pre-load the optimizer with the bulk data from previous experiments.

230

231 Parameters

232 ----------

233 configs : Sequence[dict]

234 Records of tunable values from other experiments.

235 scores : Sequence[Optional[Dict[str, TunableValue]]]

236 Benchmark results from experiments that correspond to `configs`.

237 status : Optional[Sequence[Status]]

238 Status of the experiments that correspond to `configs`.

239

240 Returns

241 -------

242 is_not_empty : bool

243 True if there is data to register, false otherwise.

244 """

245 _LOG.info(

246 "Update the optimizer with: %d configs, %d scores, %d status values",

247 len(configs or []),

248 len(scores or []),

249 len(status or []),

250 )

251 if len(configs or []) != len(scores or []):

252 raise ValueError("Numbers of configs and scores do not match.")

253 if status is not None and len(configs or []) != len(status or []):

254 raise ValueError("Numbers of configs and status values do not match.")

255 has_data = bool(configs and scores)

256 if has_data and self._start_with_defaults:

257 _LOG.info("Prior data exists - do *NOT* use the default initialization.")

258 self._start_with_defaults = False

259 return has_data

260

261 def suggest(self) -> TunableGroups:

262 """

263 Generate the next suggestion. Base class' implementation increments the

264 iteration count and returns the current values of the tunables.

265

266 Returns

267 -------

268 tunables : TunableGroups

269 The next configuration to benchmark.

270 These are the same tunables we pass to the constructor,

271 but with the values set to the next suggestion.

272 """

273 self._iter += 1

274 _LOG.debug("Iteration %d :: Suggest", self._iter)

275 return self._tunables.copy()

276

277 @abstractmethod

278 def register(

279 self,

280 tunables: TunableGroups,

281 status: Status,

282 score: Optional[Dict[str, TunableValue]] = None,

283 ) -> Optional[Dict[str, float]]:

284 """

285 Register the observation for the given configuration.

286

287 Parameters

288 ----------

289 tunables : TunableGroups

290 The configuration that has been benchmarked.

291 Usually it's the same config that the `.suggest()` method returned.

292 status : Status

293 Final status of the experiment (e.g., SUCCEEDED or FAILED).

294 score : Optional[Dict[str, TunableValue]]

295 A dict with the final benchmark results.

296 None if the experiment was not successful.

297

298 Returns

299 -------

300 value : Optional[Dict[str, float]]

301 Benchmark scores extracted (and possibly transformed)

302 from the dataframe that's being MINIMIZED.

303 """

304 _LOG.info(

305 "Iteration %d :: Register: %s = %s score: %s",

306 self._iter,

307 tunables,

308 status,

309 score,

310 )

311 if status.is_succeeded() == (score is None): # XOR

312 raise ValueError("Status and score must be consistent.")

313 return self._get_scores(status, score)

314

315 def _get_scores(

316 self,

317 status: Status,

318 scores: Optional[Union[Dict[str, TunableValue], Dict[str, float]]],

319 ) -> Optional[Dict[str, float]]:

320 """

321 Extract a scalar benchmark score from the dataframe. Change the sign if we are

322 maximizing.

323

324 Parameters

325 ----------

326 status : Status

327 Final status of the experiment (e.g., SUCCEEDED or FAILED).

328 scores : Optional[Dict[str, TunableValue]]

329 A dict with the final benchmark results.

330 None if the experiment was not successful.

331

332 Returns

333 -------

334 score : Optional[Dict[str, float]]

335 An optional dict of benchmark scores to be used as targets for MINIMIZATION.

336 """

337 if not status.is_completed():

338 return None

339

340 if not status.is_succeeded():

341 assert scores is None

342 # TODO: Be more flexible with values used for failed trials (not just +inf).

343 # Issue: https://github.com/microsoft/MLOS/issues/523

344 return {opt_target: float("inf") for opt_target in self._opt_targets}

345

346 assert scores is not None

347 target_metrics: Dict[str, float] = {}

348 for opt_target, opt_dir in self._opt_targets.items():

349 val = scores[opt_target]

350 assert val is not None

351 target_metrics[opt_target] = float(val) * opt_dir

352

353 return target_metrics

354

355 def not_converged(self) -> bool:

356 """

357 Return True if not converged, False otherwise.

358

359 Base implementation just checks the iteration count.

360 """

361 return self._iter < self._max_suggestions

362

363 @abstractmethod

364 def get_best_observation(

365 self,

366 ) -> Union[Tuple[Dict[str, float], TunableGroups], Tuple[None, None]]:

367 """

368 Get the best observation so far.

369

370 Returns

371 -------

372 (value, tunables) : Tuple[Dict[str, float], TunableGroups]

373 The best value and the corresponding configuration.

374 (None, None) if no successful observation has been registered yet.

375 """

Coverage for mlos_bench/mlos_bench/optimizers/base_optimizer.py: 94%

126 statements