Coverage for mlos_core/mlos_core/tests/optimizers/optimizer_test.py: 97%
175 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Tests for Bayesian Optimizers.
7"""
9from copy import deepcopy
10from typing import List, Optional, Type
12import logging
13import pytest
15import pandas as pd
16import numpy as np
17import numpy.typing as npt
18import ConfigSpace as CS
20from mlos_core.optimizers import (
21 OptimizerType, ConcreteOptimizer, OptimizerFactory, BaseOptimizer)
23from mlos_core.optimizers.bayesian_optimizers import BaseBayesianOptimizer, SmacOptimizer
24from mlos_core.spaces.adapters import SpaceAdapterType
26from mlos_core.tests import get_all_concrete_subclasses, SEED
29_LOG = logging.getLogger(__name__)
30_LOG.setLevel(logging.DEBUG)
33@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
34 *[(member.value, {}) for member in OptimizerType],
35])
36def test_create_optimizer_and_suggest(configuration_space: CS.ConfigurationSpace,
37 optimizer_class: Type[BaseOptimizer], kwargs: Optional[dict]) -> None:
38 """
39 Test that we can create an optimizer and get a suggestion from it.
40 """
41 if kwargs is None:
42 kwargs = {}
43 optimizer = optimizer_class(parameter_space=configuration_space, **kwargs)
44 assert optimizer is not None
46 assert optimizer.parameter_space is not None
48 suggestion = optimizer.suggest()
49 assert suggestion is not None
51 myrepr = repr(optimizer)
52 assert myrepr.startswith(optimizer_class.__name__)
54 # pending not implemented
55 with pytest.raises(NotImplementedError):
56 optimizer.register_pending(suggestion)
59@pytest.mark.parametrize(('optimizer_class', 'kwargs'), [
60 *[(member.value, {}) for member in OptimizerType],
61])
62def test_basic_interface_toy_problem(configuration_space: CS.ConfigurationSpace,
63 optimizer_class: Type[BaseOptimizer], kwargs: Optional[dict]) -> None:
64 """
65 Toy problem to test the optimizers.
66 """
67 max_iterations = 20
68 if kwargs is None:
69 kwargs = {}
70 if optimizer_class == OptimizerType.SMAC.value:
71 # SMAC sets the initial random samples as a percentage of the max iterations, which defaults to 100.
72 # To avoid having to train more than 25 model iterations, we set a lower number of max iterations.
73 kwargs['max_trials'] = max_iterations * 2
75 def objective(x: pd.Series) -> npt.ArrayLike: # pylint: disable=invalid-name
76 ret: npt.ArrayLike = (6 * x - 2)**2 * np.sin(12 * x - 4)
77 return ret
78 # Emukit doesn't allow specifying a random state, so we set the global seed.
79 np.random.seed(SEED)
80 optimizer = optimizer_class(parameter_space=configuration_space, **kwargs)
82 with pytest.raises(ValueError, match="No observations"):
83 optimizer.get_best_observation()
85 with pytest.raises(ValueError, match="No observations"):
86 optimizer.get_observations()
88 for _ in range(max_iterations):
89 suggestion = optimizer.suggest()
90 assert isinstance(suggestion, pd.DataFrame)
91 assert (suggestion.columns == ['x', 'y', 'z']).all()
92 # check that suggestion is in the space
93 configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict())
94 # Raises an error if outside of configuration space
95 configuration.is_valid_configuration()
96 observation = objective(suggestion['x'])
97 assert isinstance(observation, pd.Series)
98 optimizer.register(suggestion, observation)
100 best_observation = optimizer.get_best_observation()
101 assert isinstance(best_observation, pd.DataFrame)
102 assert (best_observation.columns == ['x', 'y', 'z', 'score']).all()
103 assert best_observation['score'].iloc[0] < -5
105 all_observations = optimizer.get_observations()
106 assert isinstance(all_observations, pd.DataFrame)
107 assert all_observations.shape == (20, 4)
108 assert (all_observations.columns == ['x', 'y', 'z', 'score']).all()
110 # It would be better to put this into bayesian_optimizer_test but then we'd have to refit the model
111 if isinstance(optimizer, BaseBayesianOptimizer):
112 pred_best = optimizer.surrogate_predict(best_observation[['x', 'y', 'z']])
113 assert pred_best.shape == (1,)
115 pred_all = optimizer.surrogate_predict(all_observations[['x', 'y', 'z']])
116 assert pred_all.shape == (20,)
119@pytest.mark.parametrize(('optimizer_type'), [
120 # Enumerate all supported Optimizers
121 # *[member for member in OptimizerType],
122 *list(OptimizerType),
123])
124def test_concrete_optimizer_type(optimizer_type: OptimizerType) -> None:
125 """
126 Test that all optimizer types are listed in the ConcreteOptimizer constraints.
127 """
128 assert optimizer_type.value in ConcreteOptimizer.__constraints__ # type: ignore[attr-defined] # pylint: disable=no-member
131@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [
132 # Default optimizer
133 (None, {}),
134 # Enumerate all supported Optimizers
135 *[(member, {}) for member in OptimizerType],
136 # Optimizer with non-empty kwargs argument
137])
138def test_create_optimizer_with_factory_method(configuration_space: CS.ConfigurationSpace,
139 optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None:
140 """
141 Test that we can create an optimizer via a factory.
142 """
143 if kwargs is None:
144 kwargs = {}
145 if optimizer_type is None:
146 optimizer = OptimizerFactory.create(
147 parameter_space=configuration_space,
148 optimizer_kwargs=kwargs,
149 )
150 else:
151 optimizer = OptimizerFactory.create(
152 parameter_space=configuration_space,
153 optimizer_type=optimizer_type,
154 optimizer_kwargs=kwargs,
155 )
156 assert optimizer is not None
158 assert optimizer.parameter_space is not None
160 suggestion = optimizer.suggest()
161 assert suggestion is not None
163 if optimizer_type is not None:
164 myrepr = repr(optimizer)
165 assert myrepr.startswith(optimizer_type.value.__name__)
168@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [
169 # Enumerate all supported Optimizers
170 *[(member, {}) for member in OptimizerType],
171 # Optimizer with non-empty kwargs argument
172 (OptimizerType.SMAC, {
173 # Test with default config.
174 'use_default_config': True,
175 # 'n_random_init': 10,
176 }),
177])
178def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None:
179 """
180 Toy problem to test the optimizers with llamatune space adapter.
181 """
182 # pylint: disable=too-complex
183 # pylint: disable=too-many-statements
184 # pylint: disable=too-many-locals
185 num_iters = 50
186 if kwargs is None:
187 kwargs = {}
189 def objective(point: pd.DataFrame) -> pd.Series:
190 # Best value can be reached by tuning an 1-dimensional search space
191 ret: pd.Series = np.sin(point['x'] * point['y'])
192 assert ret.hasnans is False
193 return ret
195 input_space = CS.ConfigurationSpace(seed=1234)
196 # Add two continuous inputs
197 input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='x', lower=0, upper=3))
198 input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0, upper=3))
200 # Initialize an optimizer that uses LlamaTune space adapter
201 space_adapter_kwargs = {
202 "num_low_dims": 1,
203 "special_param_values": None,
204 "max_unique_values_per_param": None,
205 }
207 # Make some adjustments to the kwargs for the optimizer and LlamaTuned
208 # optimizer for debug/testing.
210 # if optimizer_type == OptimizerType.SMAC:
211 # # Allow us to override the number of random init samples.
212 # kwargs['max_ratio'] = 1.0
213 optimizer_kwargs = deepcopy(kwargs)
214 llamatune_optimizer_kwargs = deepcopy(kwargs)
215 # if optimizer_type == OptimizerType.SMAC:
216 # optimizer_kwargs['n_random_init'] = 20
217 # llamatune_optimizer_kwargs['n_random_init'] = 10
219 llamatune_optimizer: BaseOptimizer = OptimizerFactory.create(
220 parameter_space=input_space,
221 optimizer_type=optimizer_type,
222 optimizer_kwargs=llamatune_optimizer_kwargs,
223 space_adapter_type=SpaceAdapterType.LLAMATUNE,
224 space_adapter_kwargs=space_adapter_kwargs,
225 )
226 # Initialize an optimizer that uses the original space
227 optimizer: BaseOptimizer = OptimizerFactory.create(
228 parameter_space=input_space,
229 optimizer_type=optimizer_type,
230 optimizer_kwargs=optimizer_kwargs,
231 )
232 assert optimizer is not None
233 assert llamatune_optimizer is not None
234 assert optimizer.optimizer_parameter_space != llamatune_optimizer.optimizer_parameter_space
236 llamatune_n_random_init = 0
237 opt_n_random_init = int(kwargs.get('n_random_init', 0))
238 if optimizer_type == OptimizerType.SMAC:
239 assert isinstance(optimizer, SmacOptimizer)
240 assert isinstance(llamatune_optimizer, SmacOptimizer)
241 opt_n_random_init = optimizer.n_random_init
242 llamatune_n_random_init = llamatune_optimizer.n_random_init
244 for i in range(num_iters):
245 # Place to set a breakpoint for when the optimizer is done with random init.
246 if llamatune_n_random_init and i > llamatune_n_random_init:
247 _LOG.debug("LlamaTuned Optimizer is done with random init.")
248 if opt_n_random_init and i >= opt_n_random_init:
249 _LOG.debug("Optimizer is done with random init.")
251 # loop for optimizer
252 suggestion = optimizer.suggest()
253 observation = objective(suggestion)
254 optimizer.register(suggestion, observation)
256 # loop for llamatune-optimizer
257 suggestion = llamatune_optimizer.suggest()
258 _x, _y = suggestion['x'].iloc[0], suggestion['y'].iloc[0]
259 assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3., rel=1e-3) # optimizer explores 1-dimensional space
260 observation = objective(suggestion)
261 llamatune_optimizer.register(suggestion, observation)
263 # Retrieve best observations
264 best_observation = optimizer.get_best_observation()
265 llamatune_best_observation = llamatune_optimizer.get_best_observation()
267 for best_obv in (best_observation, llamatune_best_observation):
268 assert isinstance(best_obv, pd.DataFrame)
269 assert (best_obv.columns == ['x', 'y', 'score']).all()
271 # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's one, or close to that
272 assert best_observation['score'].iloc[0] > llamatune_best_observation['score'].iloc[0] or \
273 best_observation['score'].iloc[0] + 1e-3 > llamatune_best_observation['score'].iloc[0]
275 # Retrieve and check all observations
276 for all_obvs in (optimizer.get_observations(), llamatune_optimizer.get_observations()):
277 assert isinstance(all_obvs, pd.DataFrame)
278 assert all_obvs.shape == (num_iters, 3)
279 assert (all_obvs.columns == ['x', 'y', 'score']).all()
281 # .surrogate_predict method not currently implemented if space adapter is employed
282 if isinstance(llamatune_optimizer, BaseBayesianOptimizer):
283 with pytest.raises(NotImplementedError):
284 llamatune_optimizer.surrogate_predict(llamatune_best_observation[['x', 'y']])
287# Dynamically determine all of the optimizers we have implemented.
288# Note: these must be sorted.
289optimizer_subclasses: List[Type[BaseOptimizer]] = get_all_concrete_subclasses(BaseOptimizer, # type: ignore[type-abstract]
290 pkg_name='mlos_core')
291assert optimizer_subclasses
294@pytest.mark.parametrize(('optimizer_class'), optimizer_subclasses)
295def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None:
296 """
297 Test that all optimizer classes are listed in the OptimizerType enum.
298 """
299 optimizer_type_classes = {member.value for member in OptimizerType}
300 assert optimizer_class in optimizer_type_classes
303@pytest.mark.parametrize(('optimizer_type', 'kwargs'), [
304 # Default optimizer
305 (None, {}),
306 # Enumerate all supported Optimizers
307 *[(member, {}) for member in OptimizerType],
308 # Optimizer with non-empty kwargs argument
309])
310def test_mixed_numerics_type_input_space_types(optimizer_type: Optional[OptimizerType], kwargs: Optional[dict]) -> None:
311 """
312 Toy problem to test the optimizers with mixed numeric types to ensure that original dtypes are retained.
313 """
314 max_iterations = 10
315 if kwargs is None:
316 kwargs = {}
318 def objective(point: pd.DataFrame) -> pd.Series:
319 # mix of hyperparameters, optimal is to select the highest possible
320 ret: pd.Series = point["x"] + point["y"]
321 return ret
323 input_space = CS.ConfigurationSpace(seed=SEED)
324 # add a mix of numeric datatypes
325 input_space.add_hyperparameter(CS.UniformIntegerHyperparameter(name='x', lower=0, upper=5))
326 input_space.add_hyperparameter(CS.UniformFloatHyperparameter(name='y', lower=0.0, upper=5.0))
328 if optimizer_type is None:
329 optimizer = OptimizerFactory.create(
330 parameter_space=input_space,
331 optimizer_kwargs=kwargs,
332 )
333 else:
334 optimizer = OptimizerFactory.create(
335 parameter_space=input_space,
336 optimizer_type=optimizer_type,
337 optimizer_kwargs=kwargs,
338 )
340 with pytest.raises(ValueError, match="No observations"):
341 optimizer.get_best_observation()
343 with pytest.raises(ValueError, match="No observations"):
344 optimizer.get_observations()
346 for _ in range(max_iterations):
347 suggestion = optimizer.suggest()
348 assert isinstance(suggestion, pd.DataFrame)
349 assert (suggestion.columns == ['x', 'y']).all()
350 # Check suggestion values are the expected dtype
351 assert isinstance(suggestion['x'].iloc[0], np.integer)
352 assert isinstance(suggestion['y'].iloc[0], np.floating)
353 # Check that suggestion is in the space
354 test_configuration = CS.Configuration(optimizer.parameter_space, suggestion.astype('O').iloc[0].to_dict())
355 # Raises an error if outside of configuration space
356 test_configuration.is_valid_configuration()
357 # Test registering the suggested configuration with a score.
358 observation = objective(suggestion)
359 assert isinstance(observation, pd.Series)
360 optimizer.register(suggestion, observation)
362 best_observation = optimizer.get_best_observation()
363 assert isinstance(best_observation, pd.DataFrame)
365 all_observations = optimizer.get_observations()
366 assert isinstance(all_observations, pd.DataFrame)