Coverage for mlos_core/mlos_core/tests/optimizers/optimizer_test.py: 97%

175 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-05 00:36 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Tests for Bayesian Optimizers. 

7""" 

8 

9from copy import deepcopy 

10from typing import List, Optional, Type 

11 

12import logging 

13import pytest 

14 

15import pandas as pd 

16import numpy as np 

17import numpy.typing as npt 

18import ConfigSpace as CS 

19 

20from mlos_core.optimizers import ( 

21 OptimizerType, ConcreteOptimizer, OptimizerFactory, BaseOptimizer) 

22 

23from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer, SmacOptimizer 

24from mlos_core.spaces.adapters import SpaceAdapterType 

25 

26from mlos_core.tests import get_all_concrete_subclasses, SEED 

27 

28 

29_LOG = logging.getLogger(__name__) 

30_LOG.setLevel(logging.DEBUG) 

31 

32 

33@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ 

34 *[(member.value, {}) for member in OptimizerType], 

35]) 

36def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace, 

37 optimizer_class: Type[BaseOptimizer], kwargs: Optional[dict]) -> None: 

38 """ 

39 Test that we can create an optimizer and get a suggestion from it. 

40 """ 

41 if kwargs is None: 

42 kwargs = {} 

43 optimizer = optimizer_class(parameter_space=configuration_space, **kwargs) 

44 assert optimizer is not None 

45 

46 assert optimizer.parameter_space is not None 

47 

48 suggestion = optimizer.suggest() 

49 assert suggestion is not None 

50 

51 myrepr = repr(optimizer) 

52 assert myrepr.startswith(optimizer_class.__name__) 

53 

54 # pending not implemented 

55 with pytest.raises(NotImplementedError): 

56 optimizer.register_pending(suggestion) 

57 

58 

59@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [ 

60 *[(member.value, {}) for member in OptimizerType], 

61]) 

62def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace, 

63 optimizer_class: Type[BaseOptimizer], kwargs: Optional[dict]) -> None: 

64 """ 

65 Toy problem to test the optimizers. 

66 """ 

67 max_iterations = 20 

68 if kwargs is None: 

69 kwargs = {} 

70 if optimizer_class == OptimizerType.SMAC.value: 

71 # SMAC sets the initial random samples as a percentage of the max iterations, which defaults to 100. 

72 # To avoid having to train more than 25 model iterations, we set a lower number of max iterations. 

73 kwargs['max_trials'] = max_iterations * 2 

74 

75 def objective(x: pd.Series) -> npt.ArrayLike: # pylint: disable=invalid-name 

76 ret: npt.ArrayLike = (6 * x - 2)**2 * np.sin(12 * x - 4) 

77 return ret 

78 # Emukit doesn't allow specifying a random state, so we set the global seed. 

79 np.random.seed(SEED) 

80 optimizer = optimizer_class(parameter_space=configuration_space, **kwargs) 

81 

82 with pytest.raises(ValueError, match="No observations"): 

83 optimizer.get_best_observation() 

84 

85 with pytest.raises(ValueError, match="No observations"): 

86 optimizer.get_observations() 

87 

88 for _ in range(max_iterations): 

89 suggestion = optimizer.suggest() 

90 assert isinstance(suggestion, pd.DataFrame) 

91 assert (suggestion.columns == ['x', 'y', 'z']).all() 

92 # check that suggestion is in the space 

93 configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict()) 

94 # Raises an error if outside of configuration space 

95 configuration.is_valid_configuration() 

96 observation = objective(suggestion['x']) 

97 assert isinstance(observation, pd.Series) 

98 optimizer.register(suggestion, observation) 

99 

100 best_observation = optimizer.get_best_observation() 

101 assert isinstance(best_observation, pd.DataFrame) 

102 assert (best_observation.columns == ['x', 'y', 'z', 'score']).all() 

103 assert best_observation['score'].iloc[0] < -5 

104 

105 all_observations = optimizer.get_observations() 

106 assert isinstance(all_observations, pd.DataFrame) 

107 assert all_observations.shape == (20, 4) 

108 assert (all_observations.columns == ['x', 'y', 'z', 'score']).all() 

109 

110 # It would be better to put this into bayesian_optimizer_test but then we'd have to refit the model 

111 if isinstance(optimizer, BaseBayesianOptimizer): 

112 pred_best = optimizer.surrogate_predict(best_observation[['x', 'y', 'z']]) 

113 assert pred_best.shape == (1,) 

114 

115 pred_all = optimizer.surrogate_predict(all_observations[['x', 'y', 'z']]) 

116 assert pred_all.shape == (20,) 

117 

118 

119@pytest.mark.parametrize(('optimizer_type'), [ 

120 # Enumerate all supported Optimizers 

121 # *[member for member in OptimizerType], 

122 *list(OptimizerType), 

123]) 

124def test_concrete_optimizer_type(optimizer_type: OptimizerType) -> None: 

125 """ 

126 Test that all optimizer types are listed in the ConcreteOptimizer constraints. 

127 """ 

128 assert optimizer_type.value in ConcreteOptimizer.__constraints__ # type: ignore[attr-defined] # pylint: disable=no-member 

129 

130 

131@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [ 

132 # Default optimizer 

133 (None, {}), 

134 # Enumerate all supported Optimizers 

135 *[(member, {}) for member in OptimizerType], 

136 # Optimizer with non-empty kwargs argument 

137]) 

138def test_create_optimizer_with_factory_method(configuration_space: CS.ConfigurationSpace, 

139 optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None: 

140 """ 

141 Test that we can create an optimizer via a factory. 

142 """ 

143 if kwargs is None: 

144 kwargs = {} 

145 if optimizer_type is None: 

146 optimizer = OptimizerFactory.create( 

147 parameter_space=configuration_space, 

148 optimizer_kwargs=kwargs, 

149 ) 

150 else: 

151 optimizer = OptimizerFactory.create( 

152 parameter_space=configuration_space, 

153 optimizer_type=optimizer_type, 

154 optimizer_kwargs=kwargs, 

155 ) 

156 assert optimizer is not None 

157 

158 assert optimizer.parameter_space is not None 

159 

160 suggestion = optimizer.suggest() 

161 assert suggestion is not None 

162 

163 if optimizer_type is not None: 

164 myrepr = repr(optimizer) 

165 assert myrepr.startswith(optimizer_type.value.__name__) 

166 

167 

168@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [ 

169 # Enumerate all supported Optimizers 

170 *[(member, {}) for member in OptimizerType], 

171 # Optimizer with non-empty kwargs argument 

172 (OptimizerType.SMAC, { 

173 # Test with default config. 

174 'use_default_config': True, 

175 # 'n_random_init': 10, 

176 }), 

177]) 

178def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None: 

179 """ 

180 Toy problem to test the optimizers with llamatune space adapter. 

181 """ 

182 # pylint: disable=too-complex 

183 # pylint: disable=too-many-statements 

184 # pylint: disable=too-many-locals 

185 num_iters = 50 

186 if kwargs is None: 

187 kwargs = {} 

188 

189 def objective(point: pd.DataFrame) -> pd.Series: 

190 # Best value can be reached by tuning an 1-dimensional search space 

191 ret: pd.Series = np.sin(point['x'] * point['y']) 

192 assert ret.hasnans is False 

193 return ret 

194 

195 input_space = CS.ConfigurationSpace(seed=1234) 

196 # Add two continuous inputs 

197 input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='x', lower=0, upper=3)) 

198 input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0, upper=3)) 

199 

200 # Initialize an optimizer that uses LlamaTune space adapter 

201 space_adapter_kwargs = { 

202 "num_low_dims": 1, 

203 "special_param_values": None, 

204 "max_unique_values_per_param": None, 

205 } 

206 

207 # Make some adjustments to the kwargs for the optimizer and LlamaTuned 

208 # optimizer for debug/testing. 

209 

210 # if optimizer_type == OptimizerType.SMAC: 

211 # # Allow us to override the number of random init samples. 

212 # kwargs['max_ratio'] = 1.0 

213 optimizer_kwargs = deepcopy(kwargs) 

214 llamatune_optimizer_kwargs = deepcopy(kwargs) 

215 # if optimizer_type == OptimizerType.SMAC: 

216 # optimizer_kwargs['n_random_init'] = 20 

217 # llamatune_optimizer_kwargs['n_random_init'] = 10 

218 

219 llamatune_optimizer: BaseOptimizer = OptimizerFactory.create( 

220 parameter_space=input_space, 

221 optimizer_type=optimizer_type, 

222 optimizer_kwargs=llamatune_optimizer_kwargs, 

223 space_adapter_type=SpaceAdapterType.LLAMATUNE, 

224 space_adapter_kwargs=space_adapter_kwargs, 

225 ) 

226 # Initialize an optimizer that uses the original space 

227 optimizer: BaseOptimizer = OptimizerFactory.create( 

228 parameter_space=input_space, 

229 optimizer_type=optimizer_type, 

230 optimizer_kwargs=optimizer_kwargs, 

231 ) 

232 assert optimizer is not None 

233 assert llamatune_optimizer is not None 

234 assert optimizer.optimizer_parameter_space != llamatune_optimizer.optimizer_parameter_space 

235 

236 llamatune_n_random_init = 0 

237 opt_n_random_init = int(kwargs.get('n_random_init', 0)) 

238 if optimizer_type == OptimizerType.SMAC: 

239 assert isinstance(optimizer, SmacOptimizer) 

240 assert isinstance(llamatune_optimizer, SmacOptimizer) 

241 opt_n_random_init = optimizer.n_random_init 

242 llamatune_n_random_init = llamatune_optimizer.n_random_init 

243 

244 for i in range(num_iters): 

245 # Place to set a breakpoint for when the optimizer is done with random init. 

246 if llamatune_n_random_init and i > llamatune_n_random_init: 

247 _LOG.debug("LlamaTuned Optimizer is done with random init.") 

248 if opt_n_random_init and i >= opt_n_random_init: 

249 _LOG.debug("Optimizer is done with random init.") 

250 

251 # loop for optimizer 

252 suggestion = optimizer.suggest() 

253 observation = objective(suggestion) 

254 optimizer.register(suggestion, observation) 

255 

256 # loop for llamatune-optimizer 

257 suggestion = llamatune_optimizer.suggest() 

258 _x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0] 

259 assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space 

260 observation = objective(suggestion) 

261 llamatune_optimizer.register(suggestion, observation) 

262 

263 # Retrieve best observations 

264 best_observation = optimizer.get_best_observation() 

265 llamatune_best_observation = llamatune_optimizer.get_best_observation() 

266 

267 for best_obv in (best_observation, llamatune_best_observation): 

268 assert isinstance(best_obv, pd.DataFrame) 

269 assert (best_obv.columns == ['x', 'y', 'score']).all() 

270 

271 # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's one, or close to that 

272 assert best_observation['score'].iloc[0] > llamatune_best_observation['score'].iloc[0] or \ 

273 best_observation['score'].iloc[0] + 1e-3 > llamatune_best_observation['score'].iloc[0] 

274 

275 # Retrieve and check all observations 

276 for all_obvs in (optimizer.get_observations(), llamatune_optimizer.get_observations()): 

277 assert isinstance(all_obvs, pd.DataFrame) 

278 assert all_obvs.shape == (num_iters, 3) 

279 assert (all_obvs.columns == ['x', 'y', 'score']).all() 

280 

281 # .surrogate_predict method not currently implemented if space adapter is employed 

282 if isinstance(llamatune_optimizer, BaseBayesianOptimizer): 

283 with pytest.raises(NotImplementedError): 

284 llamatune_optimizer.surrogate_predict(llamatune_best_observation[['x', 'y']]) 

285 

286 

287# Dynamically determine all of the optimizers we have implemented. 

288# Note: these must be sorted. 

289optimizer_subclasses: List[Type[BaseOptimizer]] = get_all_concrete_subclasses(BaseOptimizer, # type: ignore[type-abstract] 

290 pkg_name='mlos_core') 

291assert optimizer_subclasses 

292 

293 

294@pytest.mark.parametrize(('optimizer_class'), optimizer_subclasses) 

295def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None: 

296 """ 

297 Test that all optimizer classes are listed in the OptimizerType enum. 

298 """ 

299 optimizer_type_classes = {member.value for member in OptimizerType} 

300 assert optimizer_class in optimizer_type_classes 

301 

302 

303@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [ 

304 # Default optimizer 

305 (None, {}), 

306 # Enumerate all supported Optimizers 

307 *[(member, {}) for member in OptimizerType], 

308 # Optimizer with non-empty kwargs argument 

309]) 

310def test_mixed_numerics_type_input_space_types(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None: 

311 """ 

312 Toy problem to test the optimizers with mixed numeric types to ensure that original dtypes are retained. 

313 """ 

314 max_iterations = 10 

315 if kwargs is None: 

316 kwargs = {} 

317 

318 def objective(point: pd.DataFrame) -> pd.Series: 

319 # mix of hyperparameters, optimal is to select the highest possible 

320 ret: pd.Series = point["x"] + point["y"] 

321 return ret 

322 

323 input_space = CS.ConfigurationSpace(seed=SEED) 

324 # add a mix of numeric datatypes 

325 input_space.add_hyperparameter(CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5)) 

326 input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0)) 

327 

328 if optimizer_type is None: 

329 optimizer = OptimizerFactory.create( 

330 parameter_space=input_space, 

331 optimizer_kwargs=kwargs, 

332 ) 

333 else: 

334 optimizer = OptimizerFactory.create( 

335 parameter_space=input_space, 

336 optimizer_type=optimizer_type, 

337 optimizer_kwargs=kwargs, 

338 ) 

339 

340 with pytest.raises(ValueError, match="No observations"): 

341 optimizer.get_best_observation() 

342 

343 with pytest.raises(ValueError, match="No observations"): 

344 optimizer.get_observations() 

345 

346 for _ in range(max_iterations): 

347 suggestion = optimizer.suggest() 

348 assert isinstance(suggestion, pd.DataFrame) 

349 assert (suggestion.columns == ['x', 'y']).all() 

350 # Check suggestion values are the expected dtype 

351 assert isinstance(suggestion['x'].iloc[0], np.integer) 

352 assert isinstance(suggestion['y'].iloc[0], np.floating) 

353 # Check that suggestion is in the space 

354 test_configuration = CS.Configuration(optimizer.parameter_space, suggestion.astype('O').iloc[0].to_dict()) 

355 # Raises an error if outside of configuration space 

356 test_configuration.is_valid_configuration() 

357 # Test registering the suggested configuration with a score. 

358 observation = objective(suggestion) 

359 assert isinstance(observation, pd.Series) 

360 optimizer.register(suggestion, observation) 

361 

362 best_observation = optimizer.get_best_observation() 

363 assert isinstance(best_observation, pd.DataFrame) 

364 

365 all_observations = optimizer.get_observations() 

366 assert isinstance(all_observations, pd.DataFrame)