Coverage for mlos_core/mlos_core/optimizers/flaml_optimizer.py: 98%

52 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-06 00:35 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Contains the FlamlOptimizer class. 

7""" 

8 

9from typing import Dict, NamedTuple, Optional, Union 

10from warnings import warn 

11 

12import ConfigSpace 

13import numpy as np 

14import pandas as pd 

15 

16from mlos_core.util import normalize_config 

17from mlos_core.optimizers.optimizer import BaseOptimizer 

18from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

19 

20 

21class EvaluatedSample(NamedTuple): 

22 """A named tuple representing a sample that has been evaluated.""" 

23 

24 config: dict 

25 score: float 

26 

27 

28class FlamlOptimizer(BaseOptimizer): 

29 """Wrapper class for FLAML Optimizer: A fast library for AutoML and tuning. 

30 

31 Parameters 

32 ---------- 

33 parameter_space : ConfigSpace.ConfigurationSpace 

34 The parameter space to optimize. 

35 

36 space_adapter : BaseSpaceAdapter 

37 The space adapter class to employ for parameter space transformations. 

38 

39 low_cost_partial_config : dict 

40 A dictionary from a subset of controlled dimensions to the initial low-cost values. 

41 More info: https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune 

42 

43 seed : Optional[int] 

44 If provided, calls np.random.seed() with the provided value to set the seed globally at init. 

45 """ 

46 

47 def __init__(self, *, 

48 parameter_space: ConfigSpace.ConfigurationSpace, 

49 space_adapter: Optional[BaseSpaceAdapter] = None, 

50 low_cost_partial_config: Optional[dict] = None, 

51 seed: Optional[int] = None): 

52 

53 super().__init__( 

54 parameter_space=parameter_space, 

55 space_adapter=space_adapter, 

56 ) 

57 

58 # Per upstream documentation, it is recommended to set the seed for 

59 # flaml at the start of its operation globally. 

60 if seed is not None: 

61 np.random.seed(seed) 

62 

63 # pylint: disable=import-outside-toplevel 

64 from mlos_core.spaces.converters.flaml import configspace_to_flaml_space, FlamlDomain 

65 

66 self.flaml_parameter_space: Dict[str, FlamlDomain] = configspace_to_flaml_space(self.optimizer_parameter_space) 

67 self.low_cost_partial_config = low_cost_partial_config 

68 

69 self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {} 

70 self._suggested_config: Optional[dict] 

71 

72 def _register(self, configurations: pd.DataFrame, scores: pd.Series, 

73 context: Optional[pd.DataFrame] = None) -> None: 

74 """Registers the given configurations and scores. 

75 

76 Parameters 

77 ---------- 

78 configurations : pd.DataFrame 

79 Dataframe of configurations / parameters. The columns are parameter names and the rows are the configurations. 

80 

81 scores : pd.Series 

82 Scores from running the configurations. The index is the same as the index of the configurations. 

83 

84 context : None 

85 Not Yet Implemented. 

86 """ 

87 if context is not None: 

88 raise NotImplementedError() 

89 for (_, config), score in zip(configurations.astype('O').iterrows(), scores): 

90 cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( 

91 self.optimizer_parameter_space, values=config.to_dict()) 

92 if cs_config in self.evaluated_samples: 

93 warn(f"Configuration {config} was already registered", UserWarning) 

94 

95 self.evaluated_samples[cs_config] = EvaluatedSample(config=config.to_dict(), score=score) 

96 

97 def _suggest(self, context: Optional[pd.DataFrame] = None) -> pd.DataFrame: 

98 """Suggests a new configuration. 

99 

100 Sampled at random using ConfigSpace. 

101 

102 Parameters 

103 ---------- 

104 context : None 

105 Not Yet Implemented. 

106 

107 Returns 

108 ------- 

109 configuration : pd.DataFrame 

110 Pandas dataframe with a single row. Column names are the parameter names. 

111 """ 

112 if context is not None: 

113 raise NotImplementedError() 

114 config: dict = self._get_next_config() 

115 return pd.DataFrame(config, index=[0]) 

116 

117 def register_pending(self, configurations: pd.DataFrame, 

118 context: Optional[pd.DataFrame] = None) -> None: 

119 raise NotImplementedError() 

120 

121 def _target_function(self, config: dict) -> Union[dict, None]: 

122 """Configuration evaluation function called by FLAML optimizer. 

123 

124 FLAML may suggest the same configuration multiple times (due to its warm-start mechanism). 

125 Once FLAML suggests an unseen configuration, we store it, and stop the optimization process. 

126 

127 Parameters 

128 ---------- 

129 config: dict 

130 Next configuration to be evaluated, as suggested by FLAML. 

131 This config is stored internally and is returned to user, via `.suggest()` method. 

132 

133 Returns 

134 ------- 

135 result: Union[dict, None] 

136 Dictionary with a single key, `score`, if config already evaluated; `None` otherwise. 

137 """ 

138 cs_config = normalize_config(self.optimizer_parameter_space, config) 

139 if cs_config in self.evaluated_samples: 

140 return {'score': self.evaluated_samples[cs_config].score} 

141 

142 self._suggested_config = dict(cs_config) # Cleaned-up version of the config 

143 return None # Returning None stops the process 

144 

145 def _get_next_config(self) -> dict: 

146 """Warm-starts a new instance of FLAML, and returns a recommended, unseen new configuration. 

147 

148 Since FLAML does not provide an ask-and-tell interface, we need to create a new instance of FLAML 

149 each time we get asked for a new suggestion. This is suboptimal performance-wise, but works. 

150 To do so, we use any previously evaluated configurations to bootstrap FLAML (i.e., warm-start). 

151 For more info: https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function#warm-start 

152 

153 Returns 

154 ------- 

155 result: dict 

156 Dictionary with a single key, `score`, if config already evaluated; `None` otherwise. 

157 

158 Raises 

159 ------ 

160 RuntimeError: if FLAML did not suggest a previously unseen configuration. 

161 """ 

162 from flaml import tune # pylint: disable=import-outside-toplevel 

163 

164 # Parse evaluated configs to format used by FLAML 

165 points_to_evaluate: list = [] 

166 evaluated_rewards: list = [] 

167 if len(self.evaluated_samples) > 0: 

168 points_to_evaluate = [ 

169 dict(normalize_config(self.optimizer_parameter_space, conf)) 

170 for conf in self.evaluated_samples 

171 ] 

172 evaluated_rewards = [ 

173 s.score for s in self.evaluated_samples.values() 

174 ] 

175 

176 # Warm start FLAML optimizer 

177 self._suggested_config = None 

178 tune.run( 

179 self._target_function, 

180 config=self.flaml_parameter_space, 

181 mode='min', 

182 metric='score', 

183 points_to_evaluate=points_to_evaluate, 

184 evaluated_rewards=evaluated_rewards, 

185 num_samples=len(points_to_evaluate) + 1, 

186 low_cost_partial_config=self.low_cost_partial_config, 

187 verbose=0, 

188 ) 

189 if self._suggested_config is None: 

190 raise RuntimeError('FLAML did not produce a suggestion') 

191 

192 return self._suggested_config # type: ignore[unreachable]