Coverage for mlos_core/mlos_core/tests/optimizers/optimizer_test.py: 97%
194 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Tests for Bayesian Optimizers."""
7import logging
8from copy import deepcopy
9from typing import List, Optional, Type
11import ConfigSpace as CS
12import numpy as np
13import pandas as pd
14import pytest
16from mlos_core.optimizers import (
17 BaseOptimizer,
18 ConcreteOptimizer,
19 OptimizerFactory,
20 OptimizerType,
21)
22from mlos_core.optimizers.bayesian_optimizers import (
23 BaseBayesianOptimizer,
24 SmacOptimizer,
25)
26from mlos_core.spaces.adapters import SpaceAdapterType
27from mlos_core.tests import SEED, get_all_concrete_subclasses
29_LOG = logging.getLogger(__name__)
30_LOG.setLevel(logging.DEBUG)
33@pytest.mark.parametrize(
34 ("optimizer_class", "kwargs"),
35 [
36 *[(member.value, {}) for member in OptimizerType],
37 ],
38)
39def test_create_optimizer_and_suggest(
40 configuration_space: CS.ConfigurationSpace,
41 optimizer_class: Type[BaseOptimizer],
42 kwargs: Optional[dict],
43) -> None:
44 """Test that we can create an optimizer and get a suggestion from it."""
45 if kwargs is None:
46 kwargs = {}
47 optimizer = optimizer_class(
48 parameter_space=configuration_space,
49 optimization_targets=["score"],
50 **kwargs,
51 )
52 assert optimizer is not None
54 assert optimizer.parameter_space is not None
56 suggestion, metadata = optimizer.suggest()
57 assert suggestion is not None
59 myrepr = repr(optimizer)
60 assert myrepr.startswith(optimizer_class.__name__)
62 # pending not implemented
63 with pytest.raises(NotImplementedError):
64 optimizer.register_pending(configs=suggestion, metadata=metadata)
67@pytest.mark.parametrize(
68 ("optimizer_class", "kwargs"),
69 [
70 *[(member.value, {}) for member in OptimizerType],
71 ],
72)
73def test_basic_interface_toy_problem(
74 configuration_space: CS.ConfigurationSpace,
75 optimizer_class: Type[BaseOptimizer],
76 kwargs: Optional[dict],
77) -> None:
78 """Toy problem to test the optimizers."""
79 # pylint: disable=too-many-locals
80 max_iterations = 20
81 if kwargs is None:
82 kwargs = {}
83 if optimizer_class == OptimizerType.SMAC.value:
84 # SMAC sets the initial random samples as a percentage of the max
85 # iterations, which defaults to 100.
86 # To avoid having to train more than 25 model iterations, we set a lower
87 # number of max iterations.
88 kwargs["max_trials"] = max_iterations * 2
90 def objective(x: pd.Series) -> pd.DataFrame:
91 return pd.DataFrame({"score": (6 * x - 2) ** 2 * np.sin(12 * x - 4)})
93 # Emukit doesn't allow specifying a random state, so we set the global seed.
94 np.random.seed(SEED)
95 optimizer = optimizer_class(
96 parameter_space=configuration_space,
97 optimization_targets=["score"],
98 **kwargs,
99 )
101 with pytest.raises(ValueError, match="No observations"):
102 optimizer.get_best_observations()
104 with pytest.raises(ValueError, match="No observations"):
105 optimizer.get_observations()
107 for _ in range(max_iterations):
108 suggestion, metadata = optimizer.suggest()
109 assert isinstance(suggestion, pd.DataFrame)
110 assert metadata is None or isinstance(metadata, pd.DataFrame)
111 assert set(suggestion.columns) == {"x", "y", "z"}
112 # check that suggestion is in the space
113 configuration = CS.Configuration(optimizer.parameter_space, suggestion.iloc[0].to_dict())
114 # Raises an error if outside of configuration space
115 configuration.check_valid_configuration()
116 observation = objective(suggestion["x"])
117 assert isinstance(observation, pd.DataFrame)
118 optimizer.register(configs=suggestion, scores=observation, metadata=metadata)
120 (best_config, best_score, best_context) = optimizer.get_best_observations()
121 assert isinstance(best_config, pd.DataFrame)
122 assert isinstance(best_score, pd.DataFrame)
123 assert best_context is None
124 assert set(best_config.columns) == {"x", "y", "z"}
125 assert set(best_score.columns) == {"score"}
126 assert best_config.shape == (1, 3)
127 assert best_score.shape == (1, 1)
128 assert best_score.score.iloc[0] < -5
130 (all_configs, all_scores, all_contexts) = optimizer.get_observations()
131 assert isinstance(all_configs, pd.DataFrame)
132 assert isinstance(all_scores, pd.DataFrame)
133 assert all_contexts is None
134 assert set(all_configs.columns) == {"x", "y", "z"}
135 assert set(all_scores.columns) == {"score"}
136 assert all_configs.shape == (20, 3)
137 assert all_scores.shape == (20, 1)
139 # It would be better to put this into bayesian_optimizer_test but then we'd have
140 # to refit the model
141 if isinstance(optimizer, BaseBayesianOptimizer):
142 pred_best = optimizer.surrogate_predict(configs=best_config)
143 assert pred_best.shape == (1,)
145 pred_all = optimizer.surrogate_predict(configs=all_configs)
146 assert pred_all.shape == (20,)
149@pytest.mark.parametrize(
150 ("optimizer_type"),
151 [
152 # Enumerate all supported Optimizers
153 # *[member for member in OptimizerType],
154 *list(OptimizerType),
155 ],
156)
157def test_concrete_optimizer_type(optimizer_type: OptimizerType) -> None:
158 """Test that all optimizer types are listed in the ConcreteOptimizer constraints."""
159 # pylint: disable=no-member
160 assert optimizer_type.value in ConcreteOptimizer.__constraints__
163@pytest.mark.parametrize(
164 ("optimizer_type", "kwargs"),
165 [
166 # Default optimizer
167 (None, {}),
168 # Enumerate all supported Optimizers
169 *[(member, {}) for member in OptimizerType],
170 # Optimizer with non-empty kwargs argument
171 ],
172)
173def test_create_optimizer_with_factory_method(
174 configuration_space: CS.ConfigurationSpace,
175 optimizer_type: Optional[OptimizerType],
176 kwargs: Optional[dict],
177) -> None:
178 """Test that we can create an optimizer via a factory."""
179 if kwargs is None:
180 kwargs = {}
181 if optimizer_type is None:
182 optimizer = OptimizerFactory.create(
183 parameter_space=configuration_space,
184 optimization_targets=["score"],
185 optimizer_kwargs=kwargs,
186 )
187 else:
188 optimizer = OptimizerFactory.create(
189 parameter_space=configuration_space,
190 optimization_targets=["score"],
191 optimizer_type=optimizer_type,
192 optimizer_kwargs=kwargs,
193 )
194 assert optimizer is not None
196 assert optimizer.parameter_space is not None
198 suggestion = optimizer.suggest()
199 assert suggestion is not None
201 if optimizer_type is not None:
202 myrepr = repr(optimizer)
203 assert myrepr.startswith(optimizer_type.value.__name__)
206@pytest.mark.parametrize(
207 ("optimizer_type", "kwargs"),
208 [
209 # Enumerate all supported Optimizers
210 *[(member, {}) for member in OptimizerType],
211 # Optimizer with non-empty kwargs argument
212 (
213 OptimizerType.SMAC,
214 {
215 # Test with default config.
216 "use_default_config": True,
217 # 'n_random_init': 10,
218 },
219 ),
220 ],
221)
222def test_optimizer_with_llamatune(optimizer_type: OptimizerType, kwargs: Optional[dict]) -> None:
223 """Toy problem to test the optimizers with llamatune space adapter."""
224 # pylint: disable=too-complex,disable=too-many-statements,disable=too-many-locals
225 num_iters = 50
226 if kwargs is None:
227 kwargs = {}
229 def objective(point: pd.DataFrame) -> pd.DataFrame:
230 # Best value can be reached by tuning an 1-dimensional search space
231 ret = pd.DataFrame({"score": np.sin(point.x * point.y)})
232 assert ret.score.hasnans is False
233 return ret
235 input_space = CS.ConfigurationSpace(seed=1234)
236 # Add two continuous inputs
237 input_space.add(CS.UniformFloatHyperparameter(name="x", lower=0, upper=3))
238 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0, upper=3))
240 # Initialize an optimizer that uses LlamaTune space adapter
241 space_adapter_kwargs = {
242 "num_low_dims": 1,
243 "special_param_values": None,
244 "max_unique_values_per_param": None,
245 }
247 # Make some adjustments to the kwargs for the optimizer and LlamaTuned
248 # optimizer for debug/testing.
250 # if optimizer_type == OptimizerType.SMAC:
251 # # Allow us to override the number of random init samples.
252 # kwargs['max_ratio'] = 1.0
253 optimizer_kwargs = deepcopy(kwargs)
254 llamatune_optimizer_kwargs = deepcopy(kwargs)
255 # if optimizer_type == OptimizerType.SMAC:
256 # optimizer_kwargs['n_random_init'] = 20
257 # llamatune_optimizer_kwargs['n_random_init'] = 10
259 llamatune_optimizer: BaseOptimizer = OptimizerFactory.create(
260 parameter_space=input_space,
261 optimization_targets=["score"],
262 optimizer_type=optimizer_type,
263 optimizer_kwargs=llamatune_optimizer_kwargs,
264 space_adapter_type=SpaceAdapterType.LLAMATUNE,
265 space_adapter_kwargs=space_adapter_kwargs,
266 )
267 # Initialize an optimizer that uses the original space
268 optimizer: BaseOptimizer = OptimizerFactory.create(
269 parameter_space=input_space,
270 optimization_targets=["score"],
271 optimizer_type=optimizer_type,
272 optimizer_kwargs=optimizer_kwargs,
273 )
274 assert optimizer is not None
275 assert llamatune_optimizer is not None
276 assert optimizer.optimizer_parameter_space != llamatune_optimizer.optimizer_parameter_space
278 llamatune_n_random_init = 0
279 opt_n_random_init = int(kwargs.get("n_random_init", 0))
280 if optimizer_type == OptimizerType.SMAC:
281 assert isinstance(optimizer, SmacOptimizer)
282 assert isinstance(llamatune_optimizer, SmacOptimizer)
283 opt_n_random_init = optimizer.n_random_init
284 llamatune_n_random_init = llamatune_optimizer.n_random_init
286 for i in range(num_iters):
287 # Place to set a breakpoint for when the optimizer is done with random init.
288 if llamatune_n_random_init and i > llamatune_n_random_init:
289 _LOG.debug("LlamaTuned Optimizer is done with random init.")
290 if opt_n_random_init and i >= opt_n_random_init:
291 _LOG.debug("Optimizer is done with random init.")
293 # loop for optimizer
294 suggestion, metadata = optimizer.suggest()
295 observation = objective(suggestion)
296 optimizer.register(configs=suggestion, scores=observation, metadata=metadata)
298 # loop for llamatune-optimizer
299 suggestion, metadata = llamatune_optimizer.suggest()
300 _x, _y = suggestion["x"].iloc[0], suggestion["y"].iloc[0]
301 # optimizer explores 1-dimensional space
302 assert _x == pytest.approx(_y, rel=1e-3) or _x + _y == pytest.approx(3.0, rel=1e-3)
303 observation = objective(suggestion)
304 llamatune_optimizer.register(configs=suggestion, scores=observation, metadata=metadata)
306 # Retrieve best observations
307 best_observation = optimizer.get_best_observations()
308 llamatune_best_observation = llamatune_optimizer.get_best_observations()
310 for best_config, best_score, best_context in (best_observation, llamatune_best_observation):
311 assert isinstance(best_config, pd.DataFrame)
312 assert isinstance(best_score, pd.DataFrame)
313 assert best_context is None
314 assert set(best_config.columns) == {"x", "y"}
315 assert set(best_score.columns) == {"score"}
317 (best_config, best_score, _context) = best_observation
318 (llamatune_best_config, llamatune_best_score, _context) = llamatune_best_observation
320 # LlamaTune's optimizer score should better (i.e., lower) than plain optimizer's
321 # one, or close to that
322 assert (
323 best_score.score.iloc[0] > llamatune_best_score.score.iloc[0]
324 or best_score.score.iloc[0] + 1e-3 > llamatune_best_score.score.iloc[0]
325 )
327 # Retrieve and check all observations
328 for all_configs, all_scores, all_contexts in (
329 optimizer.get_observations(),
330 llamatune_optimizer.get_observations(),
331 ):
332 assert isinstance(all_configs, pd.DataFrame)
333 assert isinstance(all_scores, pd.DataFrame)
334 assert all_contexts is None
335 assert set(all_configs.columns) == {"x", "y"}
336 assert set(all_scores.columns) == {"score"}
337 assert len(all_configs) == num_iters
338 assert len(all_scores) == num_iters
340 # .surrogate_predict method not currently implemented if space adapter is employed
341 if isinstance(llamatune_optimizer, BaseBayesianOptimizer):
342 with pytest.raises(NotImplementedError):
343 llamatune_optimizer.surrogate_predict(configs=llamatune_best_config)
346# Dynamically determine all of the optimizers we have implemented.
347# Note: these must be sorted.
348optimizer_subclasses: List[Type[BaseOptimizer]] = get_all_concrete_subclasses(
349 BaseOptimizer, # type: ignore[type-abstract]
350 pkg_name="mlos_core",
351)
352assert optimizer_subclasses
355@pytest.mark.parametrize(("optimizer_class"), optimizer_subclasses)
356def test_optimizer_type_defs(optimizer_class: Type[BaseOptimizer]) -> None:
357 """Test that all optimizer classes are listed in the OptimizerType enum."""
358 optimizer_type_classes = {member.value for member in OptimizerType}
359 assert optimizer_class in optimizer_type_classes
362@pytest.mark.parametrize(
363 ("optimizer_type", "kwargs"),
364 [
365 # Default optimizer
366 (None, {}),
367 # Enumerate all supported Optimizers
368 *[(member, {}) for member in OptimizerType],
369 # Optimizer with non-empty kwargs argument
370 ],
371)
372def test_mixed_numerics_type_input_space_types(
373 optimizer_type: Optional[OptimizerType],
374 kwargs: Optional[dict],
375) -> None:
376 """Toy problem to test the optimizers with mixed numeric types to ensure that
377 original dtypes are retained.
378 """
379 # pylint: disable=too-many-locals
380 max_iterations = 10
381 if kwargs is None:
382 kwargs = {}
384 def objective(point: pd.DataFrame) -> pd.DataFrame:
385 # mix of hyperparameters, optimal is to select the highest possible
386 return pd.DataFrame({"score": point["x"] + point["y"]})
388 input_space = CS.ConfigurationSpace(seed=SEED)
389 # add a mix of numeric datatypes
390 input_space.add(CS.UniformIntegerHyperparameter(name="x", lower=0, upper=5))
391 input_space.add(CS.UniformFloatHyperparameter(name="y", lower=0.0, upper=5.0))
393 if optimizer_type is None:
394 optimizer = OptimizerFactory.create(
395 parameter_space=input_space,
396 optimization_targets=["score"],
397 optimizer_kwargs=kwargs,
398 )
399 else:
400 optimizer = OptimizerFactory.create(
401 parameter_space=input_space,
402 optimization_targets=["score"],
403 optimizer_type=optimizer_type,
404 optimizer_kwargs=kwargs,
405 )
407 with pytest.raises(ValueError, match="No observations"):
408 optimizer.get_best_observations()
410 with pytest.raises(ValueError, match="No observations"):
411 optimizer.get_observations()
413 for _ in range(max_iterations):
414 suggestion, metadata = optimizer.suggest()
415 assert isinstance(suggestion, pd.DataFrame)
416 assert (suggestion.columns == ["x", "y"]).all()
417 # Check suggestion values are the expected dtype
418 assert isinstance(suggestion["x"].iloc[0], np.integer)
419 assert isinstance(suggestion["y"].iloc[0], np.floating)
420 # Check that suggestion is in the space
421 test_configuration = CS.Configuration(
422 optimizer.parameter_space, suggestion.astype("O").iloc[0].to_dict()
423 )
424 # Raises an error if outside of configuration space
425 test_configuration.check_valid_configuration()
426 # Test registering the suggested configuration with a score.
427 observation = objective(suggestion)
428 assert isinstance(observation, pd.DataFrame)
429 optimizer.register(configs=suggestion, scores=observation, metadata=metadata)
431 (best_config, best_score, best_context) = optimizer.get_best_observations()
432 assert isinstance(best_config, pd.DataFrame)
433 assert isinstance(best_score, pd.DataFrame)
434 assert best_context is None
436 (all_configs, all_scores, all_contexts) = optimizer.get_observations()
437 assert isinstance(all_configs, pd.DataFrame)
438 assert isinstance(all_scores, pd.DataFrame)
439 assert all_contexts is None