Coverage for mlos_core/mlos_core/tests/optimizers/one_hot_test.py: 100%

53 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-05 00:36 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Tests for one-hot encoding for certain optimizers. 

7""" 

8 

9import pytest 

10 

11import pandas as pd 

12import numpy as np 

13import numpy.typing as npt 

14import ConfigSpace as CS 

15 

16from mlos_core.optimizers import SmacOptimizer 

17 

18# pylint: disable=protected-access,redefined-outer-name 

19 

20 

21@pytest.fixture 

22def data_frame() -> pd.DataFrame: 

23 """ 

24 Toy data frame corresponding to the `configuration_space` hyperparameters. 

25 The columns are deliberately *not* in alphabetic order. 

26 """ 

27 return pd.DataFrame({ 

28 'y': ['a', 'b', 'c'], 

29 'x': [0.1, 0.2, 0.3], 

30 'z': [1, 5, 8], 

31 }) 

32 

33 

34@pytest.fixture 

35def one_hot_data_frame() -> npt.NDArray: 

36 """ 

37 One-hot encoding of the `data_frame` above. 

38 The columns follow the order of the hyperparameters in `configuration_space`. 

39 """ 

40 return np.array([ 

41 [0.1, 1.0, 0.0, 0.0, 1.0], 

42 [0.2, 0.0, 1.0, 0.0, 5.0], 

43 [0.3, 0.0, 0.0, 1.0, 8.0], 

44 ]) 

45 

46 

47@pytest.fixture 

48def series() -> pd.Series: 

49 """ 

50 Toy series corresponding to the `configuration_space` hyperparameters. 

51 The columns are deliberately *not* in alphabetic order. 

52 """ 

53 return pd.Series({ 

54 'y': 'b', 

55 'x': 0.4, 

56 'z': 3, 

57 }) 

58 

59 

60@pytest.fixture 

61def one_hot_series() -> npt.NDArray: 

62 """ 

63 One-hot encoding of the `series` above. 

64 The columns follow the order of the hyperparameters in `configuration_space`. 

65 """ 

66 return np.array([ 

67 [0.4, 0.0, 1.0, 0.0, 3], 

68 ]) 

69 

70 

71def test_to_1hot_data_frame(configuration_space: CS.ConfigurationSpace, 

72 data_frame: pd.DataFrame, one_hot_data_frame: npt.NDArray) -> None: 

73 """ 

74 Toy problem to test one-hot encoding of dataframe. 

75 """ 

76 optimizer = SmacOptimizer(parameter_space=configuration_space) 

77 assert optimizer._to_1hot(data_frame) == pytest.approx(one_hot_data_frame) 

78 

79 

80def test_to_1hot_series(configuration_space: CS.ConfigurationSpace, 

81 series: pd.Series, one_hot_series: npt.NDArray) -> None: 

82 """ 

83 Toy problem to test one-hot encoding of series. 

84 """ 

85 optimizer = SmacOptimizer(parameter_space=configuration_space) 

86 assert optimizer._to_1hot(series) == pytest.approx(one_hot_series) 

87 

88 

89def test_from_1hot_data_frame(configuration_space: CS.ConfigurationSpace, 

90 data_frame: pd.DataFrame, one_hot_data_frame: npt.NDArray) -> None: 

91 """ 

92 Toy problem to test one-hot decoding of dataframe. 

93 """ 

94 optimizer = SmacOptimizer(parameter_space=configuration_space) 

95 assert optimizer._from_1hot(one_hot_data_frame).to_dict() == data_frame.to_dict() 

96 

97 

98def test_from_1hot_series(configuration_space: CS.ConfigurationSpace, 

99 series: pd.Series, one_hot_series: npt.NDArray) -> None: 

100 """ 

101 Toy problem to test one-hot decoding of series. 

102 """ 

103 optimizer = SmacOptimizer(parameter_space=configuration_space) 

104 one_hot_df = optimizer._from_1hot(one_hot_series) 

105 assert one_hot_df.shape[0] == 1, f"Unexpected number of rows ({one_hot_df.shape[0]} != 1)" 

106 assert one_hot_df.iloc[0].to_dict() == series.to_dict() 

107 

108 

109def test_round_trip_data_frame(configuration_space: CS.ConfigurationSpace, data_frame: pd.DataFrame) -> None: 

110 """ 

111 Round-trip test for one-hot-encoding and then decoding a data frame. 

112 """ 

113 optimizer = SmacOptimizer(parameter_space=configuration_space) 

114 df_round_trip = optimizer._from_1hot(optimizer._to_1hot(data_frame)) 

115 assert df_round_trip.x.to_numpy() == pytest.approx(data_frame.x) 

116 assert (df_round_trip.y == data_frame.y).all() 

117 assert (df_round_trip.z == data_frame.z).all() 

118 

119 

120def test_round_trip_series(configuration_space: CS.ConfigurationSpace, series: pd.DataFrame) -> None: 

121 """ 

122 Round-trip test for one-hot-encoding and then decoding a series. 

123 """ 

124 optimizer = SmacOptimizer(parameter_space=configuration_space) 

125 series_round_trip = optimizer._from_1hot(optimizer._to_1hot(series)) 

126 assert series_round_trip.x.to_numpy() == pytest.approx(series.x) 

127 assert (series_round_trip.y == series.y).all() 

128 assert (series_round_trip.z == series.z).all() 

129 

130 

131def test_round_trip_reverse_data_frame(configuration_space: CS.ConfigurationSpace, one_hot_data_frame: npt.NDArray) -> None: 

132 """ 

133 Round-trip test for one-hot-decoding and then encoding of a numpy array. 

134 """ 

135 optimizer = SmacOptimizer(parameter_space=configuration_space) 

136 round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_data_frame)) 

137 assert round_trip == pytest.approx(one_hot_data_frame) 

138 

139 

140def test_round_trip_reverse_series(configuration_space: CS.ConfigurationSpace, one_hot_series: npt.NDArray) -> None: 

141 """ 

142 Round-trip test for one-hot-decoding and then encoding of a numpy array. 

143 """ 

144 optimizer = SmacOptimizer(parameter_space=configuration_space) 

145 round_trip = optimizer._to_1hot(optimizer._from_1hot(one_hot_series)) 

146 assert round_trip == pytest.approx(one_hot_series)