Coverage for mlos_core/mlos_core/tests/optimizers/one_hot_test.py: 100%
53 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-05 00:36 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Tests for one-hot encoding for certain optimizers.
7"""
9import pytest
11import pandas as pd
12import numpy as np
13import numpy.typing as npt
14import ConfigSpace as CS
16from mlos_core.optimizers import SmacOptimizer
18# pylint: disable=protected-access,redefined-outer-name
21@pytest.fixture
22def data_frame() -> pd.DataFrame:
23 """
24 Toy data frame corresponding to the `configuration_space` hyperparameters.
25 The columns are deliberately *not* in alphabetic order.
26 """
27 return pd.DataFrame({
28 'y': ['a', 'b', 'c'],
29 'x': [0.1, 0.2, 0.3],
30 'z': [1, 5, 8],
31 })
34@pytest.fixture
35def one_hot_data_frame() -> npt.NDArray:
36 """
37 One-hot encoding of the `data_frame` above.
38 The columns follow the order of the hyperparameters in `configuration_space`.
39 """
40 return np.array([
41 [0.1, 1.0, 0.0, 0.0, 1.0],
42 [0.2, 0.0, 1.0, 0.0, 5.0],
43 [0.3, 0.0, 0.0, 1.0, 8.0],
44 ])
47@pytest.fixture
48def series() -> pd.Series:
49 """
50 Toy series corresponding to the `configuration_space` hyperparameters.
51 The columns are deliberately *not* in alphabetic order.
52 """
53 return pd.Series({
54 'y': 'b',
55 'x': 0.4,
56 'z': 3,
57 })
60@pytest.fixture
61def one_hot_series() -> npt.NDArray:
62 """
63 One-hot encoding of the `series` above.
64 The columns follow the order of the hyperparameters in `configuration_space`.
65 """
66 return np.array([
67 [0.4, 0.0, 1.0, 0.0, 3],
68 ])
71def test_to_1hot_data_frame(configuration_space: CS.ConfigurationSpace,
72 data_frame: pd.DataFrame, one_hot_data_frame: npt.NDArray) -> None:
73 """
74 Toy problem to test one-hot encoding of dataframe.
75 """
76 optimizer = SmacOptimizer(parameter_space=configuration_space)
77 assert optimizer._to_1hot(data_frame) == pytest.approx(one_hot_data_frame)
80def test_to_1hot_series(configuration_space: CS.ConfigurationSpace,
81 series: pd.Series, one_hot_series: npt.NDArray) -> None:
82 """
83 Toy problem to test one-hot encoding of series.
84 """
85 optimizer = SmacOptimizer(parameter_space=configuration_space)
86 assert optimizer._to_1hot(series) == pytest.approx(one_hot_series)
89def test_from_1hot_data_frame(configuration_space: CS.ConfigurationSpace,
90 data_frame: pd.DataFrame, one_hot_data_frame: npt.NDArray) -> None:
91 """
92 Toy problem to test one-hot decoding of dataframe.
93 """
94 optimizer = SmacOptimizer(parameter_space=configuration_space)
95 assert optimizer._from_1hot(one_hot_data_frame).to_dict() == data_frame.to_dict()
98def test_from_1hot_series(configuration_space: CS.ConfigurationSpace,
99 series: pd.Series, one_hot_series: npt.NDArray) -> None:
100 """
101 Toy problem to test one-hot decoding of series.
102 """
103 optimizer = SmacOptimizer(parameter_space=configuration_space)
104 one_hot_df = optimizer._from_1hot(one_hot_series)
105 assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)"
106 assert one_hot_df.iloc[0].to_dict() == series.to_dict()
109def test_round_trip_data_frame(configuration_space: CS.ConfigurationSpace, data_frame: pd.DataFrame) -> None:
110 """
111 Round-trip test for one-hot-encoding and then decoding a data frame.
112 """
113 optimizer = SmacOptimizer(parameter_space=configuration_space)
114 df_round_trip = optimizer._from_1hot(optimizer._to_1hot(data_frame))
115 assert df_round_trip.x.to_numpy() == pytest.approx(data_frame.x)
116 assert (df_round_trip.y == data_frame.y).all()
117 assert (df_round_trip.z == data_frame.z).all()
120def test_round_trip_series(configuration_space: CS.ConfigurationSpace, series: pd.DataFrame) -> None:
121 """
122 Round-trip test for one-hot-encoding and then decoding a series.
123 """
124 optimizer = SmacOptimizer(parameter_space=configuration_space)
125 series_round_trip = optimizer._from_1hot(optimizer._to_1hot(series))
126 assert series_round_trip.x.to_numpy() == pytest.approx(series.x)
127 assert (series_round_trip.y == series.y).all()
128 assert (series_round_trip.z == series.z).all()
131def test_round_trip_reverse_data_frame(configuration_space: CS.ConfigurationSpace, one_hot_data_frame: npt.NDArray) -> None:
132 """
133 Round-trip test for one-hot-decoding and then encoding of a numpy array.
134 """
135 optimizer = SmacOptimizer(parameter_space=configuration_space)
136 round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_data_frame))
137 assert round_trip == pytest.approx(one_hot_data_frame)
140def test_round_trip_reverse_series(configuration_space: CS.ConfigurationSpace, one_hot_series: npt.NDArray) -> None:
141 """
142 Round-trip test for one-hot-decoding and then encoding of a numpy array.
143 """
144 optimizer = SmacOptimizer(parameter_space=configuration_space)
145 round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_series))
146 assert round_trip == pytest.approx(one_hot_series)