Coverage for mlos_core/mlos_core/tests/optimizers/optimizer_test.py: 98%

207 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-20 00:44 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Tests for Bayesian Optimizers.""" 

6 

7import logging 

8from copy import deepcopy 

9from typing import Any, List, Optional, Type 

10 

11import ConfigSpace as CS 

12import numpy as np 

13import pandas as pd 

14import pytest 

15 

16from mlos_core.data_classes import Observations, Suggestion 

17from mlos_core.optimizers import ( 

18 BaseOptimizer, 

19 ConcreteOptimizer, 

20 OptimizerFactory, 

21 OptimizerType, 

22) 

23from mlos_core.optimizers.bayesian_optimizers import ( 

24 BaseBayesianOptimizer, 

25 SmacOptimizer, 

26) 

27from mlos_core.spaces.adapters import SpaceAdapterType 

28from mlos_core.tests import SEED, get_all_concrete_subclasses 

29 

30_LOG = logging.getLogger(__name__) 

31_LOG.setLevel(logging.DEBUG) 

32 

33 

34@pytest.mark.parametrize( 

35 ("optimizer_class", "kwargs"), 

36 [ 

37 *[(member.value, {}) for member in OptimizerType], 

38 ], 

39) 

40def test_create_optimizer_and_suggest( 

41 configuration_space: CS.ConfigurationSpace, 

42 optimizer_class: Type[BaseOptimizer], 

43 kwargs: Optional[dict], 

44) -> None: 

45 """Test that we can create an optimizer and get a suggestion from it.""" 

46 if kwargs is None: 

47 kwargs = {} 

48 optimizer = optimizer_class( 

49 parameter_space=configuration_space, 

50 optimization_targets=["score"], 

51 **kwargs, 

52 ) 

53 assert optimizer is not None 

54 

55 assert optimizer.parameter_space is not None 

56 

57 suggestion = optimizer.suggest() 

58 assert suggestion is not None 

59 

60 myrepr = repr(optimizer) 

61 assert myrepr.startswith(optimizer_class.__name__) 

62 

63 # pending not implemented 

64 with pytest.raises(NotImplementedError): 

65 optimizer.register_pending(pending=suggestion) 

66 

67 

68@pytest.mark.parametrize( 

69 ("optimizer_class", "kwargs"), 

70 [ 

71 *[(member.value, {}) for member in OptimizerType], 

72 ], 

73) 

74def test_basic_interface_toy_problem( 

75 configuration_space: CS.ConfigurationSpace, 

76 optimizer_class: Type[BaseOptimizer], 

77 kwargs: Optional[dict], 

78) -> None: 

79 """Toy problem to test the optimizers.""" 

80 # pylint: disable=too-many-locals 

81 max_iterations = 20 

82 if kwargs is None: 

83 kwargs = {} 

84 if optimizer_class == OptimizerType.SMAC.value: 

85 # SMAC sets the initial random samples as a percentage of the max 

86 # iterations, which defaults to 100. 

87 # To avoid having to train more than 25 model iterations, we set a lower 

88 # number of max iterations. 

89 kwargs["max_trials"] = max_iterations * 2 

90 

91 def objective(inp: float) -> pd.Series: 

92 series: pd.Series = pd.Series( 

93 {"score": (6 * inp - 2) ** 2 * np.sin(12 * inp - 4)} 

94 ) # needed for type hinting 

95 return series 

96 

97 # Emukit doesn't allow specifying a random state, so we set the global seed. 

98 np.random.seed(SEED) 

99 optimizer = optimizer_class( 

100 parameter_space=configuration_space, 

101 optimization_targets=["score"], 

102 **kwargs, 

103 ) 

104 

105 with pytest.raises(ValueError, match="No observations"): 

106 optimizer.get_best_observations() 

107 

108 with pytest.raises(ValueError, match="No observations"): 

109 optimizer.get_observations() 

110 

111 for _ in range(max_iterations): 

112 suggestion = optimizer.suggest() 

113 assert isinstance(suggestion, Suggestion) 

114 assert isinstance(suggestion.config, pd.Series) 

115 assert suggestion.metadata is None or isinstance(suggestion.metadata, pd.Series) 

116 assert set(suggestion.config.index) == {"x", "y", "z"} 

117 # check that suggestion is in the space 

118 dict_config: dict = suggestion.config.to_dict() 

119 configuration = CS.Configuration(optimizer.parameter_space, dict_config) 

120 # Raises an error if outside of configuration space 

121 configuration.check_valid_configuration() 

122 inp: Any = suggestion.config["x"] 

123 assert isinstance(inp, (int, float)) 

124 observation = objective(inp) 

125 assert isinstance(observation, pd.Series) 

126 optimizer.register(observations=suggestion.complete(observation)) 

127 

128 best_observation = optimizer.get_best_observations() 

129 assert isinstance(best_observation, Observations) 

130 assert isinstance(best_observation.configs, pd.DataFrame) 

131 assert isinstance(best_observation.scores, pd.DataFrame) 

132 assert best_observation.contexts is None 

133 assert set(best_observation.configs.columns) == {"x", "y", "z"} 

134 assert set(best_observation.scores.columns) == {"score"} 

135 assert best_observation.configs.shape == (1, 3) 

136 assert best_observation.scores.shape == (1, 1) 

137 assert best_observation.scores.score.iloc[0] < -4 

138 

139 all_observations = optimizer.get_observations() 

140 assert isinstance(all_observations, Observations) 

141 assert isinstance(all_observations.configs, pd.DataFrame) 

142 assert isinstance(all_observations.scores, pd.DataFrame) 

143 assert all_observations.contexts is None 

144 assert set(all_observations.configs.columns) == {"x", "y", "z"} 

145 assert set(all_observations.scores.columns) == {"score"} 

146 assert all_observations.configs.shape == (20, 3) 

147 assert all_observations.scores.shape == (20, 1) 

148 

149 # It would be better to put this into bayesian_optimizer_test but then we'd have 

150 # to refit the model 

151 if isinstance(optimizer, BaseBayesianOptimizer): 

152 pred_best = [ 

153 optimizer.surrogate_predict(suggestion=observation.to_suggestion()) 

154 for observation in best_observation 

155 ] 

156 assert len(pred_best) == 1 

157 

158 pred_all = [ 

159 optimizer.surrogate_predict(suggestion=observation.to_suggestion()) 

160 for observation in all_observations 

161 ] 

162 assert len(pred_all) == 20 

163 

164 

165@pytest.mark.parametrize( 

166 ("optimizer_type"), 

167 [ 

168 # Enumerate all supported Optimizers 

169 # *[member for member in OptimizerType], 

170 *list(OptimizerType), 

171 ], 

172) 

173def test_concrete_optimizer_type(optimizer_type: OptimizerType) -> None: 

174 """Test that all optimizer types are listed in the ConcreteOptimizer constraints.""" 

175 # pylint: disable=no-member 

176 assert optimizer_type.value in ConcreteOptimizer.__constraints__ 

177 

178 

179@pytest.mark.parametrize( 

180 ("optimizer_type", "kwargs"), 

181 [ 

182 # Default optimizer 

183 (None, {}), 

184 # Enumerate all supported Optimizers 

185 *[(member, {}) for member in OptimizerType], 

186 # Optimizer with non-empty kwargs argument 

187 ], 

188) 

189def test_create_optimizer_with_factory_method( 

190 configuration_space: CS.ConfigurationSpace, 

191 optimizer_type: Optional[OptimizerType], 

192 kwargs: Optional[dict], 

193) -> None: 

194 """Test that we can create an optimizer via a factory.""" 

195 if kwargs is None: 

196 kwargs = {} 

197 if optimizer_type is None: 

198 optimizer = OptimizerFactory.create( 

199 parameter_space=configuration_space, 

200 optimization_targets=["score"], 

201 optimizer_kwargs=kwargs, 

202 ) 

203 else: 

204 optimizer = OptimizerFactory.create( 

205 parameter_space=configuration_space, 

206 optimization_targets=["score"], 

207 optimizer_type=optimizer_type, 

208 optimizer_kwargs=kwargs, 

209 ) 

210 assert optimizer is not None 

211 

212 assert optimizer.parameter_space is not None 

213 

214 suggestion = optimizer.suggest() 

215 assert suggestion is not None 

216 

217 if optimizer_type is not None: 

218 myrepr = repr(optimizer) 

219 assert myrepr.startswith(optimizer_type.value.__name__) 

220 

221 

222@pytest.mark.parametrize( 

223 ("optimizer_type", "kwargs"), 

224 [ 

225 # Enumerate all supported Optimizers 

226 *[(member, {}) for member in OptimizerType], 

227 # Optimizer with non-empty kwargs argument 

228 ( 

229 OptimizerType.SMAC, 

230 { 

231 # Test with default config. 

232 "use_default_config": True, 

233 # 'n_random_init': 10, 

234 }, 

235 ), 

236 ], 

237) 

238def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None: 

239 """Toy problem to test the optimizers with llamatune space adapter.""" 

240 # pylint: disable=too-complex,disable=too-many-statements,disable=too-many-locals 

241 num_iters = 50 

242 if kwargs is None: 

243 kwargs = {} 

244 

245 def objective(point: pd.Series) -> pd.Series: 

246 # Best value can be reached by tuning an 1-dimensional search space 

247 ret: pd.Series = pd.Series({"score": np.sin(point.x * point.y)}) 

248 assert pd.notna(ret.score) 

249 return ret 

250 

251 input_space = CS.ConfigurationSpace(seed=1234) 

252 # Add two continuous inputs 

253 input_space.add(CS.UniformFloatHyperparameter(name="x", lower=0, upper=3)) 

254 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0, upper=3)) 

255 

256 # Initialize an optimizer that uses LlamaTune space adapter 

257 space_adapter_kwargs = { 

258 "num_low_dims": 1, 

259 "special_param_values": None, 

260 "max_unique_values_per_param": None, 

261 } 

262 

263 # Make some adjustments to the kwargs for the optimizer and LlamaTuned 

264 # optimizer for debug/testing. 

265 

266 # if optimizer_type == OptimizerType.SMAC: 

267 # # Allow us to override the number of random init samples. 

268 # kwargs['max_ratio'] = 1.0 

269 optimizer_kwargs = deepcopy(kwargs) 

270 llamatune_optimizer_kwargs = deepcopy(kwargs) 

271 # if optimizer_type == OptimizerType.SMAC: 

272 # optimizer_kwargs['n_random_init'] = 20 

273 # llamatune_optimizer_kwargs['n_random_init'] = 10 

274 

275 llamatune_optimizer: BaseOptimizer = OptimizerFactory.create( 

276 parameter_space=input_space, 

277 optimization_targets=["score"], 

278 optimizer_type=optimizer_type, 

279 optimizer_kwargs=llamatune_optimizer_kwargs, 

280 space_adapter_type=SpaceAdapterType.LLAMATUNE, 

281 space_adapter_kwargs=space_adapter_kwargs, 

282 ) 

283 # Initialize an optimizer that uses the original space 

284 optimizer: BaseOptimizer = OptimizerFactory.create( 

285 parameter_space=input_space, 

286 optimization_targets=["score"], 

287 optimizer_type=optimizer_type, 

288 optimizer_kwargs=optimizer_kwargs, 

289 ) 

290 assert optimizer is not None 

291 assert llamatune_optimizer is not None 

292 assert optimizer.optimizer_parameter_space != llamatune_optimizer.optimizer_parameter_space 

293 

294 llamatune_n_random_init = 0 

295 opt_n_random_init = int(kwargs.get("n_random_init", 0)) 

296 if optimizer_type == OptimizerType.SMAC: 

297 assert isinstance(optimizer, SmacOptimizer) 

298 assert isinstance(llamatune_optimizer, SmacOptimizer) 

299 opt_n_random_init = optimizer.n_random_init 

300 llamatune_n_random_init = llamatune_optimizer.n_random_init 

301 

302 for i in range(num_iters): 

303 # Place to set a breakpoint for when the optimizer is done with random init. 

304 if llamatune_n_random_init and i > llamatune_n_random_init: 

305 _LOG.debug("LlamaTuned Optimizer is done with random init.") 

306 if opt_n_random_init and i >= opt_n_random_init: 

307 _LOG.debug("Optimizer is done with random init.") 

308 

309 # loop for optimizer 

310 suggestion = optimizer.suggest() 

311 observation = objective(suggestion.config) 

312 optimizer.register(observations=suggestion.complete(observation)) 

313 

314 # loop for llamatune-optimizer 

315 suggestion = llamatune_optimizer.suggest() 

316 _x, _y = suggestion.config["x"], suggestion.config["y"] 

317 # optimizer explores 1-dimensional space 

318 assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3.0, rel=1e-3) 

319 observation = objective(suggestion.config) 

320 llamatune_optimizer.register(observations=suggestion.complete(observation)) 

321 

322 # Retrieve best observations 

323 best_observation: Observations = optimizer.get_best_observations() 

324 assert isinstance(best_observation, Observations) 

325 llamatune_best_observations: Observations = llamatune_optimizer.get_best_observations() 

326 assert isinstance(llamatune_best_observations, Observations) 

327 

328 for observations in (best_observation, llamatune_best_observations): 

329 assert isinstance(observations.configs, pd.DataFrame) 

330 assert isinstance(observations.scores, pd.DataFrame) 

331 assert observations.contexts is None 

332 assert set(observations.configs.columns) == {"x", "y"} 

333 assert set(observations.scores.columns) == {"score"} 

334 

335 # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's 

336 # one, or close to that 

337 assert ( 

338 best_observation.scores.score.iloc[0] > llamatune_best_observations.scores.score.iloc[0] 

339 or best_observation.scores.score.iloc[0] + 1e-3 

340 > llamatune_best_observations.scores.score.iloc[0] 

341 ) 

342 

343 # Retrieve and check all observations 

344 for all_observations in ( 

345 optimizer.get_observations(), 

346 llamatune_optimizer.get_observations(), 

347 ): 

348 assert isinstance(all_observations.configs, pd.DataFrame) 

349 assert isinstance(all_observations.scores, pd.DataFrame) 

350 assert all_observations.contexts is None 

351 assert set(all_observations.configs.columns) == {"x", "y"} 

352 assert set(all_observations.scores.columns) == {"score"} 

353 assert len(all_observations.configs) == num_iters 

354 assert len(all_observations.scores) == num_iters 

355 assert len(all_observations) == num_iters 

356 

357 # .surrogate_predict method not currently implemented if space adapter is employed 

358 if isinstance(llamatune_optimizer, BaseBayesianOptimizer): 

359 with pytest.raises(NotImplementedError): 

360 for obs in llamatune_best_observations: 

361 llamatune_optimizer.surrogate_predict(suggestion=obs.to_suggestion()) 

362 

363 

364# Dynamically determine all of the optimizers we have implemented. 

365# Note: these must be sorted. 

366optimizer_subclasses: List[Type[BaseOptimizer]] = get_all_concrete_subclasses( 

367 BaseOptimizer, # type: ignore[type-abstract] 

368 pkg_name="mlos_core", 

369) 

370assert optimizer_subclasses 

371 

372 

373@pytest.mark.parametrize(("optimizer_class"), optimizer_subclasses) 

374def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: 

375 """Test that all optimizer classes are listed in the OptimizerType enum.""" 

376 optimizer_type_classes = {member.value for member in OptimizerType} 

377 assert optimizer_class in optimizer_type_classes 

378 

379 

380@pytest.mark.parametrize( 

381 ("optimizer_type", "kwargs"), 

382 [ 

383 # Default optimizer 

384 (None, {}), 

385 # Enumerate all supported Optimizers 

386 *[(member, {}) for member in OptimizerType], 

387 # Optimizer with non-empty kwargs argument 

388 ], 

389) 

390def test_mixed_numerics_type_input_space_types( 

391 optimizer_type: Optional[OptimizerType], 

392 kwargs: Optional[dict], 

393) -> None: 

394 """Toy problem to test the optimizers with mixed numeric types to ensure that 

395 original dtypes are retained. 

396 """ 

397 # pylint: disable=too-many-locals 

398 max_iterations = 10 

399 if kwargs is None: 

400 kwargs = {} 

401 

402 def objective(point: pd.Series) -> pd.Series: 

403 # mix of hyperparameters, optimal is to select the highest possible 

404 ret: pd.Series = pd.Series({"score": point["x"] + point["y"]}) 

405 return ret 

406 

407 input_space = CS.ConfigurationSpace(seed=SEED) 

408 # add a mix of numeric datatypes 

409 input_space.add(CS.UniformIntegerHyperparameter(name="x", lower=0, upper=5)) 

410 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0.0, upper=5.0)) 

411 

412 if optimizer_type is None: 

413 optimizer = OptimizerFactory.create( 

414 parameter_space=input_space, 

415 optimization_targets=["score"], 

416 optimizer_kwargs=kwargs, 

417 ) 

418 else: 

419 optimizer = OptimizerFactory.create( 

420 parameter_space=input_space, 

421 optimization_targets=["score"], 

422 optimizer_type=optimizer_type, 

423 optimizer_kwargs=kwargs, 

424 ) 

425 

426 assert isinstance(optimizer, BaseOptimizer) 

427 

428 with pytest.raises(ValueError, match="No observations"): 

429 optimizer.get_best_observations() 

430 

431 with pytest.raises(ValueError, match="No observations"): 

432 optimizer.get_observations() 

433 

434 for _ in range(max_iterations): 

435 suggestion = optimizer.suggest() 

436 assert isinstance(suggestion, Suggestion) 

437 assert isinstance(suggestion.config, pd.Series) 

438 assert set(suggestion.config.index) == {"x", "y"} 

439 # Check suggestion values are the expected dtype 

440 assert isinstance(suggestion.config["x"], int) 

441 assert isinstance(suggestion.config["y"], float) 

442 # Check that suggestion is in the space 

443 test_configuration = CS.Configuration( 

444 optimizer.parameter_space, suggestion.config.to_dict() 

445 ) 

446 # Raises an error if outside of configuration space 

447 test_configuration.check_valid_configuration() 

448 # Test registering the suggested configuration with a score. 

449 observation = objective(suggestion.config) 

450 assert isinstance(observation, pd.Series) 

451 optimizer.register(observations=suggestion.complete(observation)) 

452 

453 best_observations = optimizer.get_best_observations() 

454 assert isinstance(best_observations.configs, pd.DataFrame) 

455 assert isinstance(best_observations.scores, pd.DataFrame) 

456 assert best_observations.contexts is None 

457 

458 all_observations = optimizer.get_observations() 

459 assert isinstance(all_observations.configs, pd.DataFrame) 

460 assert isinstance(all_observations.scores, pd.DataFrame) 

461 assert all_observations.contexts is None