Coverage for mlos_bench/mlos_bench/optimizers/mlos_core

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""

6A wrapper for :py:mod:`mlos_core.optimizers` for :py:mod:`mlos_bench`.

8Config

9------

10:py:mod:`mlos_bench.optimizers` has an overview of the configuration options for

11the py:mod:`.MlosCoreOptimizer`.

13See Also

14--------

15:py:mod:`mlos_bench.optimizers` :

16 Another working example of an :py:class:`.MlosCoreOptimizer`.

17:py:mod:`mlos_core.optimizers` :

18 Documentation on the underlying mlos_core Optimizers.

19:py:mod:`mlos_core.spaces.adapters` :

20 Documentation on the underlying mlos_core SpaceAdapters.

22Examples

23--------

24Load tunables from a JSON string.

25Note: normally these would be automatically loaded from the

26:py:mod:`~mlos_bench.environments.base_environment.Environment`'s

27``include_tunables`` config parameter.

29>>> import json5 as json

30>>> import mlos_core.optimizers

31>>> from mlos_bench.environments.status import Status

32>>> from mlos_bench.services.config_persistence import ConfigPersistenceService

33>>> service = ConfigPersistenceService()

34>>> json_config = '''

35... {

36... "group_1": {

37... "cost": 1,

38... "params": {

39... "flags": {

40... "type": "categorical",

41... "values": ["on", "off", "auto"],

42... "default": "auto",

43... },

44... "int_param": {

45... "type": "int",

46... "range": [1, 100],

47... "default": 10,

48... },

49... "float_param": {

50... "type": "float",

51... "range": [0, 100],

52... "default": 50.0,

53... }

54... }

55... }

56... }

57... '''

58>>> tunables = service.load_tunables(jsons=[json_config])

59>>> # Here's the defaults:

60>>> tunables.get_param_values()

61{'flags': 'auto', 'int_param': 10, 'float_param': 50.0}

63When using the :py:class:`.MlosCoreOptimizer`, we can also specify some

64additional properties, for instance the ``optimizer_type``, which is one of the

65mlos_core :py:data:`~mlos_core.optimizers.OptimizerType` enum values:

67>>> import mlos_core.optimizers

68>>> print([member.name for member in mlos_core.optimizers.OptimizerType])

69['RANDOM', 'FLAML', 'SMAC']

71These may also include their own configuration options, which can be specified

72as additional key-value pairs in the ``config`` section, where each key-value

73corresponds to an argument to the respective OptimizerTypes's constructor.

74See :py:meth:`mlos_core.optimizers.OptimizerFactory.create` for more details.

76Other Optimizers may also have their own configuration options.

77See each class' documentation for details.

79When using :py:class:`.MlosCoreOptimizer`, we can also specify an optional an

80``space_adapter_type``, which can sometimes help manipulate the configuration

81space to something more manageable. It should be one of the following

82:py:data:`~mlos_core.spaces.adapters.SpaceAdapterType` enum values:

84>>> import mlos_core.spaces.adapters

85>>> print([member.name for member in mlos_core.spaces.adapters.SpaceAdapterType])

86['IDENTITY', 'LLAMATUNE']

88These may also include their own configuration options, which can be specified

89as additional key-value pairs in the optional ``space_adapter_config`` section,

90where each key-value corresponds to an argument to the respective

91OptimizerTypes's constructor. See

92:py:meth:`mlos_core.spaces.adapters.SpaceAdapterFactory.create` for more details.

94Here's an example JSON config for an :py:class:`.MlosCoreOptimizer`.

96>>> optimizer_json_config = '''

97... {

98... "class": "mlos_bench.optimizers.mlos_core_optimizer.MlosCoreOptimizer",

99... "description": "MlosCoreOptimizer",

100... "config": {

101... "max_suggestions": 1000,

102... "optimization_targets": {

103... "throughput": "max",

104... "cost": "min",

105... },

106... "start_with_defaults": true,

107... "seed": 42,

108... // Override the default optimizer type

109... // Must be one of the mlos_core OptimizerType enum values.

110... "optimizer_type": "SMAC",

111... // Optionally provide some additional configuration options for the optimizer.

112... // Note: these are optimizer-specific and may not be supported by all optimizers.

113... "n_random_init": 25,

114... "n_random_probability": 0.01,

115... // Optionally override the default space adapter type

116... // Must be one of the mlos_core SpaceAdapterType enum values.

117... // LlamaTune is a method for automatically doing space reduction

118... // from the original space.

119... "space_adapter_type": "LLAMATUNE",

120... "space_adapter_config": {

121... // Note: these values are probably too low,

122... // but it's just for demonstration.

123... "num_low_dims": 2,

124... "max_unique_values_per_param": 10,

125... }

126... }

127... }

128... '''

129

130That config will typically be loaded via the ``--optimizer`` command-line

131argument to the :py:mod:`mlos_bench <mlos_bench.run>` CLI.

132However, for demonstration purposes, we can load it directly here:

133

134>>> config = json.loads(optimizer_json_config)

135>>> optimizer = service.build_optimizer(

136... tunables=tunables,

137... service=service,

138... config=config,

139... )

140

141Internally the Scheduler will call the Optimizer's methods to suggest

142configurations, like so:

143

144>>> suggested_config_1 = optimizer.suggest()

145>>> # Normally default values should be suggested first, per json config.

146>>> # However, since LlamaTune is being employed here, the first suggestion may

147>>> # be projected to a slightly different space.

148>>> suggested_config_1.get_param_values()

149{'flags': 'auto', 'int_param': 1, 'float_param': 55.5555555555556}

150>>> # Get another suggestion.

151>>> # Note that multiple suggestions can be pending prior to

152>>> # registering their scores, supporting parallel trial execution.

153>>> suggested_config_2 = optimizer.suggest()

154>>> suggested_config_2.get_param_values()

155{'flags': 'on', 'int_param': 78, 'float_param': 88.8888888888889}

156>>> # Register some scores.

157>>> # Note: Maximization problems track negative scores to produce a minimization problem.

158>>> optimizer.register(suggested_config_1, Status.SUCCEEDED, {"throughput": 42, "cost": 19})

159{'throughput': -42.0, 'cost': 19.0}

160>>> optimizer.register(suggested_config_2, Status.SUCCEEDED, {"throughput": 7, "cost": 17.2})

161{'throughput': -7.0, 'cost': 17.2}

162>>> (best_score, best_config) = optimizer.get_best_observation()

163>>> best_score

164{'throughput': 42.0, 'cost': 19.0}

165>>> assert best_config == suggested_config_1

166"""

167

168import logging

169import os

170from collections.abc import Sequence

171from types import TracebackType

172from typing import Literal

173

174import pandas as pd

175

176from mlos_bench.environments.status import Status

177from mlos_bench.optimizers.base_optimizer import Optimizer

178from mlos_bench.optimizers.convert_configspace import (

179 TunableValueKind,

180 configspace_data_to_tunable_values,

181 special_param_names,

182)

183from mlos_bench.services.base_service import Service

184from mlos_bench.tunables.tunable_groups import TunableGroups

185from mlos_bench.tunables.tunable_types import TunableValue

186from mlos_core.data_classes import Observations

187from mlos_core.optimizers import (

188 DEFAULT_OPTIMIZER_TYPE,

189 BaseOptimizer,

190 OptimizerFactory,

191 OptimizerType,

192 SpaceAdapterType,

193)

194

195_LOG = logging.getLogger(__name__)

196

197

198class MlosCoreOptimizer(Optimizer):

199 """A wrapper class for the :py:mod:`mlos_core.optimizers`."""

200

201 def __init__(

202 self,

203 tunables: TunableGroups,

204 config: dict,

205 global_config: dict | None = None,

206 service: Service | None = None,

207 ):

208 super().__init__(tunables, config, global_config, service)

209

210 opt_type = getattr(

211 OptimizerType, self._config.pop("optimizer_type", DEFAULT_OPTIMIZER_TYPE.name)

212 )

213

214 if opt_type == OptimizerType.SMAC:

215 output_directory = self._config.get("output_directory")

216 if output_directory is not None:

217 # If output_directory is specified, turn it into an absolute path.

218 self._config["output_directory"] = os.path.abspath(output_directory)

219 else:

220 _LOG.warning(

221 "SMAC optimizer output_directory was null. "

222 "SMAC will use a temporary directory."

223 )

224

225 # Make sure max_trials >= max_suggestions.

226 if "max_trials" not in self._config:

227 self._config["max_trials"] = self._max_suggestions

228 assert int(self._config["max_trials"]) >= self._max_suggestions, (

229 f"""max_trials {self._config.get("max_trials")} """

230 f"<= max_suggestions{self._max_suggestions}"

231 )

232

233 if "run_name" not in self._config and self.experiment_id:

234 self._config["run_name"] = self.experiment_id

235

236 space_adapter_type = self._config.pop("space_adapter_type", None)

237 space_adapter_config = self._config.pop("space_adapter_config", {})

238

239 if space_adapter_type is not None:

240 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type)

241 assert isinstance(space_adapter_type, SpaceAdapterType)

242 if space_adapter_type == SpaceAdapterType.LLAMATUNE:

243 # This is probably a sane default, especially when

244 # bulk_registering old configs (e.g., Experiment resume), but is

245 # not currently exposed in the config schema.

246 space_adapter_config["use_approximate_reverse_mapping"] = True

247

248 self._opt: BaseOptimizer = OptimizerFactory.create(

249 parameter_space=self.config_space,

250 optimization_targets=list(self._opt_targets),

251 optimizer_type=opt_type,

252 optimizer_kwargs=self._config,

253 space_adapter_type=space_adapter_type,

254 space_adapter_kwargs=space_adapter_config,

255 )

256

257 def __exit__(

258 self,

259 ex_type: type[BaseException] | None,

260 ex_val: BaseException | None,

261 ex_tb: TracebackType | None,

262 ) -> Literal[False]:

263 self._opt.cleanup()

264 return super().__exit__(ex_type, ex_val, ex_tb)

265

266 @property

267 def name(self) -> str:

268 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}"

269

270 def bulk_register(

271 self,

272 configs: Sequence[dict],

273 scores: Sequence[dict[str, TunableValue] | None],

274 status: Sequence[Status] | None = None,

275 ) -> bool:

276

277 if not super().bulk_register(configs, scores, status):

278 return False

279

280 df_configs = self._to_df(configs) # Impute missing values, if necessary

281

282 df_scores = self._adjust_signs_df(

283 pd.DataFrame([{} if score is None else score for score in scores])

284 )

285

286 if status is not None:

287 # Select only the completed trials, set scores for failed trials to +inf.

288 df_status = pd.Series(status)

289 # TODO: Be more flexible with values used for failed trials (not just +inf).

290 # Issue: https://github.com/microsoft/MLOS/issues/523

291 df_scores[df_status != Status.SUCCEEDED] = float("inf")

292 df_status_completed = df_status.apply(Status.is_completed)

293 df_configs = df_configs[df_status_completed]

294 df_scores = df_scores[df_status_completed]

295

296 # TODO: Specify (in the config) which metrics to pass to the optimizer.

297 # Issue: https://github.com/microsoft/MLOS/issues/745

298 self._opt.register(observations=Observations(configs=df_configs, scores=df_scores))

299

300 if _LOG.isEnabledFor(logging.DEBUG):

301 (score, _) = self.get_best_observation()

302 _LOG.debug("Warm-up END: %s :: %s", self, score)

303

304 return True

305

306 def _adjust_signs_df(self, df_scores: pd.DataFrame) -> pd.DataFrame:

307 """Coerce optimization target scores to floats and adjust the signs for

308 MINIMIZATION problem.

309 """

310 df_targets = df_scores[list(self._opt_targets)]

311 try:

312 return df_targets.astype(float) * self._opt_targets.values()

313 except ValueError as ex:

314 _LOG.error(

315 "Some score values cannot be converted to float - check the data ::\n%s",

316 df_targets,

317 exc_info=True,

318 )

319 raise ValueError("Some score values cannot be converted to float") from ex

320

321 def _to_df(self, configs: Sequence[dict[str, TunableValue]]) -> pd.DataFrame:

322 """

323 Select from past trials only the columns required in this experiment and impute

324 default values for the tunables that are missing in the dataframe.

325

326 Parameters

327 ----------

328 configs : Sequence[dict]

329 Sequence of dicts with past trials data.

330

331 Returns

332 -------

333 df_configs : pd.DataFrame

334 A dataframe with past trials data, with missing values imputed.

335 """

336 df_configs = pd.DataFrame(configs)

337 tunables_names = list(self._tunables.get_param_values().keys())

338 missing_cols = set(tunables_names).difference(df_configs.columns)

339 for tunable, _group in self._tunables:

340 if tunable.name in missing_cols:

341 df_configs[tunable.name] = tunable.default

342 else:

343 df_configs.fillna({tunable.name: tunable.default}, inplace=True)

344 # External data can have incorrect types (e.g., all strings).

345 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype)

346 # Add columns for tunables with special values.

347 if tunable.special:

348 (special_name, type_name) = special_param_names(tunable.name)

349 tunables_names += [special_name, type_name]

350 is_special = df_configs[tunable.name].apply(tunable.special.__contains__)

351 df_configs[type_name] = TunableValueKind.RANGE.value

352 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL.value

353 if tunable.type == "int":

354 # Make int column NULLABLE:

355 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64")

356 df_configs[special_name] = df_configs[tunable.name]

357 df_configs.loc[~is_special, special_name] = None

358 df_configs.loc[is_special, tunable.name] = None

359 # By default, hyperparameters in ConfigurationSpace are sorted by name:

360 df_configs = df_configs[sorted(tunables_names)]

361 _LOG.debug("Loaded configs:\n%s", df_configs)

362 return df_configs

363

364 def suggest(self) -> TunableGroups:

365 tunables = super().suggest()

366 if self._start_with_defaults:

367 _LOG.info("Use default values for the first trial")

368 suggestion = self._opt.suggest(defaults=self._start_with_defaults)

369 self._start_with_defaults = False

370 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, suggestion.config)

371 return tunables.assign(configspace_data_to_tunable_values(suggestion.config.to_dict()))

372

373 def register(

374 self,

375 tunables: TunableGroups,

376 status: Status,

377 score: dict[str, TunableValue] | None = None,

378 ) -> dict[str, float] | None:

379 registered_score = super().register(

380 tunables,

381 status,

382 score,

383 ) # Sign-adjusted for MINIMIZATION

384 if status.is_completed():

385 assert registered_score is not None

386 df_config = self._to_df([tunables.get_param_values()])

387 _LOG.debug("Score: %s Dataframe:\n%s", registered_score, df_config)

388 # TODO: Specify (in the config) which metrics to pass to the optimizer.

389 # Issue: https://github.com/microsoft/MLOS/issues/745

390 self._opt.register(

391 observations=Observations(

392 configs=df_config,

393 scores=pd.DataFrame([registered_score], dtype=float),

394 )

395 )

396 return registered_score

397

398 def get_best_observation(

399 self,

400 ) -> tuple[dict[str, float], TunableGroups] | tuple[None, None]:

401 best_observations = self._opt.get_best_observations()

402 if len(best_observations) == 0:

403 return (None, None)

404 params = configspace_data_to_tunable_values(best_observations.configs.iloc[0].to_dict())

405 scores = self._adjust_signs_df(best_observations.scores).iloc[0].to_dict()

406 _LOG.debug("Best observation: %s score: %s", params, scores)

407 return (scores, self._tunables.copy().assign(params))

Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%

113 statements