Coverage for mlos_core/mlos_core/optimizers/bayesian_optimizers/smac

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""

6Contains the wrapper class for the :py:class:`.SmacOptimizer`.

8Notes

9-----

10See the `SMAC3 Documentation <https://automl.github.io/SMAC3/main/index.html>`_ for

11more details.

12"""

14from logging import warning

15from pathlib import Path

16from tempfile import TemporaryDirectory

17from typing import TYPE_CHECKING

18from warnings import warn

20import ConfigSpace

21import numpy.typing as npt

22import pandas as pd

24from mlos_core.data_classes import Observation, Observations, Suggestion

25from mlos_core.optimizers.bayesian_optimizers.bayesian_optimizer import (

26 BaseBayesianOptimizer,

27)

28from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter

29from mlos_core.spaces.adapters.identity_adapter import IdentityAdapter

32class SmacOptimizer(BaseBayesianOptimizer):

33 """Wrapper class for SMAC based Bayesian optimization."""

35 def __init__(

36 self,

37 *, # pylint: disable=too-many-locals,too-many-arguments

38 parameter_space: ConfigSpace.ConfigurationSpace,

39 optimization_targets: list[str],

40 objective_weights: list[float] | None = None,

41 space_adapter: BaseSpaceAdapter | None = None,

42 seed: int | None = 0,

43 run_name: str | None = None,

44 output_directory: str | None = None,

45 max_trials: int = 100,

46 n_random_init: int | None = None,

47 max_ratio: float | None = None,

48 use_default_config: bool = False,

49 n_random_probability: float = 0.1,

50 ):

51 """

52 Instantiate a new SMAC optimizer wrapper.

54 Parameters

55 ----------

56 parameter_space : ConfigSpace.ConfigurationSpace

57 The parameter space to optimize.

59 optimization_targets : list[str]

60 The names of the optimization targets to minimize.

62 objective_weights : Optional[list[float]]

63 Optional list of weights of optimization targets.

65 space_adapter : BaseSpaceAdapter

66 The space adapter class to employ for parameter space transformations.

68 seed : int | None

69 By default SMAC uses a known seed (0) to keep results reproducible.

70 However, if a `None` seed is explicitly provided, we let a random seed

71 be produced by SMAC.

73 run_name : str | None

74 Name of this run. This is used to easily distinguish across different runs.

75 If set to `None` (default), SMAC will generate a hash from metadata.

77 output_directory : str | None

78 The directory where SMAC output will saved. If set to `None` (default),

79 a temporary dir will be used.

81 max_trials : int

82 Maximum number of trials (i.e., function evaluations) to be run. Defaults to 100.

83 Note that modifying this value directly affects the value of

84 `n_random_init`, if latter is set to `None`.

86 n_random_init : int | None

87 Number of points evaluated at start to bootstrap the optimizer.

88 Default depends on max_trials and number of parameters and max_ratio.

89 Note: it can sometimes be useful to set this to 1 when pre-warming the

90 optimizer from historical data. See Also:

91 :py:meth:`mlos_bench.optimizers.base_optimizer.Optimizer.bulk_register`

93 max_ratio : int | None

94 Maximum ratio of max_trials to be random configs to be evaluated

95 at start to bootstrap the optimizer.

96 Useful if you want to explicitly control the number of random

97 configs evaluated at start.

99 use_default_config : bool

100 Whether to use the default config for the first trial after random initialization.

101

102 n_random_probability : float

103 Probability of choosing to evaluate a random configuration during optimization.

104 Defaults to `0.1`. Setting this to a higher value favors exploration over exploitation.

105 """

106 super().__init__(

107 parameter_space=parameter_space,

108 optimization_targets=optimization_targets,

109 objective_weights=objective_weights,

110 space_adapter=space_adapter,

111 )

112

113 # Declare at the top because we need it in __del__/cleanup()

114 self._temp_output_directory: TemporaryDirectory | None = None

115

116 # pylint: disable=import-outside-toplevel

117 from smac import HyperparameterOptimizationFacade as Optimizer_Smac

118 from smac import Scenario

119 from smac.intensifier.abstract_intensifier import AbstractIntensifier

120 from smac.main.config_selector import ConfigSelector

121 from smac.random_design.probability_design import ProbabilityRandomDesign

122 from smac.runhistory import TrialInfo

123 from smac.utils.configspace import convert_configurations_to_array

124

125 # Save util function here as a property for later usage, also to satisfy linter

126 self._convert_configurations_to_array = convert_configurations_to_array

127

128 # Store for TrialInfo instances returned by .ask()

129 self.trial_info_map: dict[ConfigSpace.Configuration, TrialInfo] = {}

130

131 # The default when not specified is to use a known seed (0) to keep results reproducible.

132 # However, if a `None` seed is explicitly provided, we let a random seed be

133 # produced by SMAC.

134 # https://automl.github.io/SMAC3/main/api/smac.scenario.html#smac.scenario.Scenario

135 seed = -1 if seed is None else seed

136

137 # Create temporary directory for SMAC output (if none provided)

138 if output_directory is None:

139 # pylint: disable=consider-using-with

140 try:

141 # Argument added in Python 3.10

142 self._temp_output_directory = TemporaryDirectory(ignore_cleanup_errors=True)

143 except TypeError:

144 self._temp_output_directory = TemporaryDirectory()

145 output_directory = self._temp_output_directory.name

146 assert output_directory is not None

147

148 if n_random_init is not None:

149 assert isinstance(n_random_init, int) and n_random_init >= 0

150 if n_random_init == max_trials and use_default_config:

151 # Increase max budgeted trials to account for use_default_config.

152 max_trials += 1

153

154 scenario: Scenario = Scenario(

155 self.optimizer_parameter_space,

156 objectives=self._optimization_targets,

157 name=run_name,

158 output_directory=Path(output_directory),

159 deterministic=True,

160 use_default_config=use_default_config,

161 n_trials=max_trials,

162 seed=seed or -1, # if -1, SMAC will generate a random seed internally

163 n_workers=1, # Use a single thread for evaluating trials

164 )

165 intensifier: AbstractIntensifier = Optimizer_Smac.get_intensifier(

166 scenario,

167 max_config_calls=1,

168 )

169 config_selector: ConfigSelector = Optimizer_Smac.get_config_selector(

170 scenario,

171 retrain_after=1,

172 )

173

174 # TODO: When bulk registering prior configs to rewarm the optimizer,

175 # there is a way to inform SMAC's initial design that we have

176 # additional_configs and can set n_configs == 0.

177 # Additionally, we may want to consider encoding those values into the

178 # runhistory when prewarming the optimizer so that the initial design

179 # doesn't reperform random init.

180 # See Also: #488

181

182 initial_design_args: dict[str, list | int | float | Scenario] = {

183 "scenario": scenario,

184 # Workaround a bug in SMAC that sets a default arg to a mutable

185 # value that can cause issues when multiple optimizers are

186 # instantiated with the use_default_config option within the same

187 # process that use different ConfigSpaces so that the second

188 # receives the default config from both as an additional config.

189 "additional_configs": [],

190 }

191 if n_random_init is not None:

192 initial_design_args["n_configs"] = n_random_init

193 if n_random_init > 0.25 * max_trials and max_ratio is None:

194 warning(

195 (

196 "Number of random initial configs (%d) is "

197 "greater than 25%% of max_trials (%d). "

198 "Consider setting max_ratio to avoid SMAC overriding n_random_init."

199 ),

200 n_random_init,

201 max_trials,

202 )

203 if max_ratio is not None:

204 assert isinstance(max_ratio, float) and 0.0 <= max_ratio <= 1.0

205 initial_design_args["max_ratio"] = max_ratio

206 self._max_ratio = max_ratio

207

208 # Use the default InitialDesign from SMAC.

209 # (currently SBOL instead of LatinHypercube due to better uniformity

210 # for initial sampling which results in lower overall samples required)

211 initial_design = Optimizer_Smac.get_initial_design(

212 **initial_design_args, # type: ignore[arg-type]

213 )

214 # initial_design = LatinHypercubeInitialDesign(

215 # **initial_design_args, # type: ignore[arg-type]

216 # )

217

218 # Workaround a bug in SMAC that doesn't pass the seed to the random

219 # design when generated a random_design for itself via the

220 # get_random_design static method when random_design is None.

221 assert isinstance(n_random_probability, float) and n_random_probability >= 0

222 random_design = ProbabilityRandomDesign(

223 probability=n_random_probability,

224 seed=scenario.seed,

225 )

226

227 self.base_optimizer = Optimizer_Smac(

228 scenario,

229 SmacOptimizer._dummy_target_func,

230 initial_design=initial_design,

231 intensifier=intensifier,

232 random_design=random_design,

233 config_selector=config_selector,

234 multi_objective_algorithm=Optimizer_Smac.get_multi_objective_algorithm(

235 scenario,

236 objective_weights=self._objective_weights,

237 ),

238 overwrite=True,

239 logging_level=False, # Use the existing logger

240 )

241

242 def __del__(self) -> None:

243 # Best-effort attempt to clean up, in case the user forgets to call .cleanup()

244 self.cleanup()

245

246 @property

247 def max_ratio(self) -> float | None:

248 """

249 Gets the `max_ratio` parameter used in py:meth:`constructor <.__init__>` of this

250 SmacOptimizer.

251

252 Returns

253 -------

254 float

255 """

256 return self._max_ratio

257

258 @property

259 def n_random_init(self) -> int:

260 """

261 Gets the number of random samples to use to initialize the optimizer's search

262 space sampling.

263

264 Note: This may not be equal to the value passed to the initializer, due to

265 logic present in the SMAC.

266

267 See Also

268 --------

269 :py:attr:`.max_ratio`

270

271 Returns

272 -------

273 int

274 The number of random samples used to initialize the optimizer's search space sampling.

275 """

276 # pylint: disable=protected-access

277 return self.base_optimizer._initial_design._n_configs

278

279 @staticmethod

280 def _dummy_target_func(config: ConfigSpace.Configuration, seed: int = 0) -> None:

281 """

282 Dummy target function for SMAC optimizer.

283

284 Since we only use the ask-and-tell interface, this is never called.

285

286 Parameters

287 ----------

288 config : ConfigSpace.Configuration

289 Configuration to evaluate.

290

291 seed : int

292 Random seed to use for the target function. Not actually used.

293 """

294 # NOTE: Providing a target function when using the ask-and-tell interface is

295 # an imperfection of the API -- this is planned to be fixed in some future

296 # release: https://github.com/automl/SMAC3/issues/946

297 raise RuntimeError("This function should never be called.")

298

299 def _register(

300 self,

301 observations: Observations,

302 ) -> None:

303 """

304 Registers one or more configs/score pairs (observations) with the underlying

305 optimizer.

306

307 Parameters

308 ----------

309 observations : Observations

310 The set of config/scores to register.

311 """

312 # TODO: Implement bulk registration.

313 # (e.g., by rebuilding the base optimizer instance with all observations).

314 for observation in observations:

315 self._register_single(observation)

316

317 def _register_single(

318 self,

319 observation: Observation,

320 ) -> None:

321 """

322 Registers the given config and its score.

323

324 Parameters

325 ----------

326 observation: Observation

327 The observation to register.

328 """

329 from smac.runhistory import ( # pylint: disable=import-outside-toplevel

330 StatusType,

331 TrialInfo,

332 TrialValue,

333 )

334

335 if observation.context is not None:

336 warn(

337 f"Not Implemented: Ignoring context {list(observation.context.index)}",

338 UserWarning,

339 )

340

341 # Retrieve previously generated TrialInfo (returned by .ask()) or create

342 # new TrialInfo instance

343 config = ConfigSpace.Configuration(

344 self.optimizer_parameter_space,

345 values=observation.config.dropna().to_dict(),

346 )

347 info: TrialInfo = self.trial_info_map.get(

348 config,

349 TrialInfo(config=config, seed=self.base_optimizer.scenario.seed),

350 )

351 value = TrialValue(

352 cost=list(observation.score.astype(float)),

353 time=0.0,

354 status=StatusType.SUCCESS,

355 )

356 self.base_optimizer.tell(info, value, save=False)

357

358 # Save optimizer once we register all configs

359 self.base_optimizer.optimizer.save()

360

361 def _suggest(

362 self,

363 *,

364 context: pd.Series | None = None,

365 ) -> Suggestion:

366 """

367 Suggests a new configuration.

368

369 Parameters

370 ----------

371 context : pd.DataFrame

372 Not Yet Implemented.

373

374 Returns

375 -------

376 suggestion: Suggestion

377 The suggestion to evaluate.

378 """

379 if TYPE_CHECKING:

380 # pylint: disable=import-outside-toplevel,unused-import

381 from smac.runhistory import TrialInfo

382

383 if context is not None:

384 warn(f"Not Implemented: Ignoring context {list(context.index)}", UserWarning)

385

386 trial: TrialInfo = self.base_optimizer.ask()

387 trial.config.check_valid_configuration()

388 ConfigSpace.Configuration(

389 self.optimizer_parameter_space,

390 values=trial.config,

391 ).check_valid_configuration()

392 assert trial.config.config_space == self.optimizer_parameter_space

393 self.trial_info_map[trial.config] = trial

394 config_sr = pd.Series(dict(trial.config), dtype=object)

395 return Suggestion(config=config_sr, context=context, metadata=None)

396

397 def register_pending(self, pending: Suggestion) -> None:

398 raise NotImplementedError()

399

400 def surrogate_predict(self, suggestion: Suggestion) -> npt.NDArray:

401 if suggestion.context is not None:

402 warn(

403 f"Not Implemented: Ignoring context {list(suggestion.context.index)}",

404 UserWarning,

405 )

406 if self._space_adapter and not isinstance(self._space_adapter, IdentityAdapter):

407 raise NotImplementedError("Space adapter not supported for surrogate_predict.")

408

409 # pylint: disable=protected-access

410 if len(self._observations) <= self.base_optimizer._initial_design._n_configs:

411 raise RuntimeError(

412 "Surrogate model can make predictions *only* after "

413 "all initial points have been evaluated "

414 f"{len(self._observations)} <= {self.base_optimizer._initial_design._n_configs}"

415 )

416 if self.base_optimizer._config_selector._model is None:

417 raise RuntimeError("Surrogate model is not yet trained")

418

419 config_array = self._convert_configurations_to_array(

420 [

421 ConfigSpace.Configuration(

422 self.optimizer_parameter_space, values=suggestion.config.to_dict()

423 )

424 ]

425 )

426 mean_predictions, _ = self.base_optimizer._config_selector._model.predict(config_array)

427 return mean_predictions.reshape(

428 -1,

429 )

430

431 def acquisition_function(self, suggestion: Suggestion) -> npt.NDArray:

432 if suggestion.context is not None:

433 warn(

434 f"Not Implemented: Ignoring context {list(suggestion.context.index)}",

435 UserWarning,

436 )

437 if self._space_adapter:

438 raise NotImplementedError()

439

440 # pylint: disable=protected-access

441 if self.base_optimizer._config_selector._acquisition_function is None:

442 raise RuntimeError("Acquisition function is not yet initialized")

443

444 return self.base_optimizer._config_selector._acquisition_function(

445 suggestion.config.config_to_configspace(self.optimizer_parameter_space)

446 ).reshape(

447 -1,

448 )

449

450 def cleanup(self) -> None:

451 if hasattr(self, "_temp_output_directory") and self._temp_output_directory is not None:

452 self._temp_output_directory.cleanup()

453 self._temp_output_directory = None

454

455 def _to_configspace_configs(self, *, configs: pd.DataFrame) -> list[ConfigSpace.Configuration]:

456 """

457 Convert a dataframe of configs to a list of ConfigSpace configs.

458

459 Parameters

460 ----------

461 configs : pd.DataFrame

462 Dataframe of configs / parameters. The columns are parameter names and

463 the rows are the configs.

464

465 Returns

466 -------

467 configs : list

468 List of ConfigSpace configs.

469 """

470 return [

471 ConfigSpace.Configuration(self.optimizer_parameter_space, values=config.to_dict())

472 for (_, config) in configs.astype("O").iterrows()

473 ]

Coverage for mlos_core/mlos_core/optimizers/bayesian_optimizers/smac_optimizer.py: 87%

111 statements