Coverage for mlos_core/mlos_core/tests/optimizers/one_hot_test.py: 100%
47 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Tests for one-hot encoding for certain optimizers."""
7import ConfigSpace as CS
8import numpy as np
9import numpy.typing as npt
10import pandas as pd
11import pytest
13from mlos_core.optimizers import BaseOptimizer, SmacOptimizer
15# pylint: disable=protected-access,redefined-outer-name
18@pytest.fixture
19def data_frame() -> pd.DataFrame:
20 """
21 Toy data frame corresponding to the `configuration_space` hyperparameters.
23 The columns are deliberately *not* in alphabetic order.
24 """
25 return pd.DataFrame(
26 {
27 "y": ["a", "b", "c"],
28 "x": [0.1, 0.2, 0.3],
29 "z": [1, 5, 8],
30 }
31 )
34@pytest.fixture
35def one_hot_data_frame() -> npt.NDArray:
36 """
37 One-hot encoding of the `data_frame` above.
39 The columns follow the order of the hyperparameters in `configuration_space`.
40 """
41 return np.array(
42 [
43 [0.1, 1.0, 0.0, 0.0, 1.0],
44 [0.2, 0.0, 1.0, 0.0, 5.0],
45 [0.3, 0.0, 0.0, 1.0, 8.0],
46 ]
47 )
50@pytest.fixture
51def series() -> pd.Series:
52 """
53 Toy series corresponding to the `configuration_space` hyperparameters.
55 The columns are deliberately *not* in alphabetic order.
56 """
57 return pd.Series(
58 {
59 "y": "b",
60 "x": 0.4,
61 "z": 3,
62 }
63 )
66@pytest.fixture
67def one_hot_series() -> npt.NDArray:
68 """
69 One-hot encoding of the `series` above.
71 The columns follow the order of the hyperparameters in `configuration_space`.
72 """
73 return np.array(
74 [
75 [0.4, 0.0, 1.0, 0.0, 3],
76 ]
77 )
80@pytest.fixture
81def optimizer(configuration_space: CS.ConfigurationSpace) -> BaseOptimizer:
82 """
83 Test fixture for the optimizer.
85 Use it to test one-hot encoding/decoding.
86 """
87 return SmacOptimizer(
88 parameter_space=configuration_space,
89 optimization_targets=["score"],
90 )
93def test_to_1hot_data_frame(
94 optimizer: BaseOptimizer,
95 data_frame: pd.DataFrame,
96 one_hot_data_frame: npt.NDArray,
97) -> None:
98 """Toy problem to test one-hot encoding of dataframe."""
99 assert optimizer._to_1hot(config=data_frame) == pytest.approx(one_hot_data_frame)
102def test_to_1hot_series(
103 optimizer: BaseOptimizer,
104 series: pd.Series,
105 one_hot_series: npt.NDArray,
106) -> None:
107 """Toy problem to test one-hot encoding of series."""
108 assert optimizer._to_1hot(config=series) == pytest.approx(one_hot_series)
111def test_from_1hot_data_frame(
112 optimizer: BaseOptimizer,
113 data_frame: pd.DataFrame,
114 one_hot_data_frame: npt.NDArray,
115) -> None:
116 """Toy problem to test one-hot decoding of dataframe."""
117 assert optimizer._from_1hot(config=one_hot_data_frame).to_dict() == data_frame.to_dict()
120def test_from_1hot_series(
121 optimizer: BaseOptimizer,
122 series: pd.Series,
123 one_hot_series: npt.NDArray,
124) -> None:
125 """Toy problem to test one-hot decoding of series."""
126 one_hot_df = optimizer._from_1hot(config=one_hot_series)
127 assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)"
128 assert one_hot_df.iloc[0].to_dict() == series.to_dict()
131def test_round_trip_data_frame(optimizer: BaseOptimizer, data_frame: pd.DataFrame) -> None:
132 """Round-trip test for one-hot-encoding and then decoding a data frame."""
133 df_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=data_frame))
134 assert df_round_trip.x.to_numpy() == pytest.approx(data_frame.x)
135 assert (df_round_trip.y == data_frame.y).all()
136 assert (df_round_trip.z == data_frame.z).all()
139def test_round_trip_series(optimizer: BaseOptimizer, series: pd.DataFrame) -> None:
140 """Round-trip test for one-hot-encoding and then decoding a series."""
141 series_round_trip = optimizer._from_1hot(config=optimizer._to_1hot(config=series))
142 assert series_round_trip.x.to_numpy() == pytest.approx(series.x)
143 assert (series_round_trip.y == series.y).all()
144 assert (series_round_trip.z == series.z).all()
147def test_round_trip_reverse_data_frame(
148 optimizer: BaseOptimizer,
149 one_hot_data_frame: npt.NDArray,
150) -> None:
151 """Round-trip test for one-hot-decoding and then encoding of a numpy array."""
152 round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_data_frame))
153 assert round_trip == pytest.approx(one_hot_data_frame)
156def test_round_trip_reverse_series(optimizer: BaseOptimizer, one_hot_series: npt.NDArray) -> None:
157 """Round-trip test for one-hot-decoding and then encoding of a numpy array."""
158 round_trip = optimizer._to_1hot(config=optimizer._from_1hot(config=one_hot_series))
159 assert round_trip == pytest.approx(one_hot_series)