Coverage for mlos_core/mlos_core/optimizers/flaml_optimizer.py: 96%

57 statements  

« prev     ^ index     » next       coverage.py v7.6.7, created at 2024-11-22 01:18 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Contains the :py:class:`.FlamlOptimizer` class. 

7 

8Notes 

9----- 

10See the `Flaml Documentation <https://microsoft.github.io/FLAML/>`_ for more 

11details. 

12""" 

13 

14from typing import Dict, List, NamedTuple, Optional, Tuple, Union 

15from warnings import warn 

16 

17import ConfigSpace 

18import numpy as np 

19import pandas as pd 

20 

21from mlos_core.optimizers.optimizer import BaseOptimizer 

22from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

23from mlos_core.util import drop_nulls, normalize_config 

24 

25 

26class EvaluatedSample(NamedTuple): 

27 """A named tuple representing a sample that has been evaluated.""" 

28 

29 config: dict 

30 score: float 

31 

32 

33class FlamlOptimizer(BaseOptimizer): 

34 """Wrapper class for FLAML Optimizer: A fast library for AutoML and tuning.""" 

35 

36 # The name of an internal objective attribute that is calculated as a weighted 

37 # average of the user provided objective metrics. 

38 _METRIC_NAME = "FLAML_score" 

39 

40 def __init__( 

41 self, 

42 *, # pylint: disable=too-many-arguments 

43 parameter_space: ConfigSpace.ConfigurationSpace, 

44 optimization_targets: List[str], 

45 objective_weights: Optional[List[float]] = None, 

46 space_adapter: Optional[BaseSpaceAdapter] = None, 

47 low_cost_partial_config: Optional[dict] = None, 

48 seed: Optional[int] = None, 

49 ): 

50 """ 

51 Create an MLOS wrapper for FLAML. 

52 

53 Parameters 

54 ---------- 

55 parameter_space : ConfigSpace.ConfigurationSpace 

56 The parameter space to optimize. 

57 

58 optimization_targets : List[str] 

59 The names of the optimization targets to minimize. 

60 

61 objective_weights : Optional[List[float]] 

62 Optional list of weights of optimization targets. 

63 

64 space_adapter : BaseSpaceAdapter 

65 The space adapter class to employ for parameter space transformations. 

66 

67 low_cost_partial_config : dict 

68 A dictionary from a subset of controlled dimensions to the initial low-cost values. 

69 More info: 

70 https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune 

71 

72 seed : Optional[int] 

73 If provided, calls np.random.seed() with the provided value to set the 

74 seed globally at init. 

75 """ 

76 super().__init__( 

77 parameter_space=parameter_space, 

78 optimization_targets=optimization_targets, 

79 objective_weights=objective_weights, 

80 space_adapter=space_adapter, 

81 ) 

82 

83 # Per upstream documentation, it is recommended to set the seed for 

84 # flaml at the start of its operation globally. 

85 if seed is not None: 

86 np.random.seed(seed) 

87 

88 # pylint: disable=import-outside-toplevel 

89 from mlos_core.spaces.converters.flaml import ( 

90 FlamlDomain, 

91 configspace_to_flaml_space, 

92 ) 

93 

94 self.flaml_parameter_space: Dict[str, FlamlDomain] = configspace_to_flaml_space( 

95 self.optimizer_parameter_space 

96 ) 

97 self.low_cost_partial_config = low_cost_partial_config 

98 

99 self.evaluated_samples: Dict[ConfigSpace.Configuration, EvaluatedSample] = {} 

100 self._suggested_config: Optional[dict] 

101 

102 def _register( 

103 self, 

104 *, 

105 configs: pd.DataFrame, 

106 scores: pd.DataFrame, 

107 context: Optional[pd.DataFrame] = None, 

108 metadata: Optional[pd.DataFrame] = None, 

109 ) -> None: 

110 """ 

111 Registers the given configs and scores. 

112 

113 Parameters 

114 ---------- 

115 configs : pd.DataFrame 

116 Dataframe of configs / parameters. The columns are parameter names and 

117 the rows are the configs. 

118 

119 scores : pd.DataFrame 

120 Scores from running the configs. The index is the same as the index of the configs. 

121 

122 context : None 

123 Not Yet Implemented. 

124 

125 metadata : None 

126 Not Yet Implemented. 

127 """ 

128 if context is not None: 

129 warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) 

130 if metadata is not None: 

131 warn(f"Not Implemented: Ignoring metadata {list(metadata.columns)}", UserWarning) 

132 

133 for (_, config), (_, score) in zip(configs.astype("O").iterrows(), scores.iterrows()): 

134 # Remove None values for inactive config parameters 

135 config_dict = drop_nulls(config.to_dict()) 

136 cs_config: ConfigSpace.Configuration = ConfigSpace.Configuration( 

137 self.optimizer_parameter_space, 

138 values=config_dict, 

139 ) 

140 if cs_config in self.evaluated_samples: 

141 warn(f"Configuration {config} was already registered", UserWarning) 

142 self.evaluated_samples[cs_config] = EvaluatedSample( 

143 config=config_dict, 

144 score=float(np.average(score.astype(float), weights=self._objective_weights)), 

145 ) 

146 

147 def _suggest( 

148 self, 

149 *, 

150 context: Optional[pd.DataFrame] = None, 

151 ) -> Tuple[pd.DataFrame, Optional[pd.DataFrame]]: 

152 """ 

153 Suggests a new configuration. 

154 

155 Sampled at random using ConfigSpace. 

156 

157 Parameters 

158 ---------- 

159 context : None 

160 Not Yet Implemented. 

161 

162 Returns 

163 ------- 

164 configuration : pd.DataFrame 

165 Pandas dataframe with a single row. Column names are the parameter names. 

166 

167 metadata : None 

168 Not implemented. 

169 """ 

170 if context is not None: 

171 warn(f"Not Implemented: Ignoring context {list(context.columns)}", UserWarning) 

172 config: dict = self._get_next_config() 

173 return pd.DataFrame(config, index=[0]), None 

174 

175 def register_pending( 

176 self, 

177 *, 

178 configs: pd.DataFrame, 

179 context: Optional[pd.DataFrame] = None, 

180 metadata: Optional[pd.DataFrame] = None, 

181 ) -> None: 

182 raise NotImplementedError() 

183 

184 def _target_function(self, config: dict) -> Union[dict, None]: 

185 """ 

186 Configuration evaluation function called by FLAML optimizer. 

187 

188 FLAML may suggest the same configuration multiple times (due to its 

189 warm-start mechanism). Once FLAML suggests an unseen configuration, we 

190 store it, and stop the optimization process. 

191 

192 Parameters 

193 ---------- 

194 config: dict 

195 Next configuration to be evaluated, as suggested by FLAML. 

196 This config is stored internally and is returned to user, via 

197 `.suggest()` method. 

198 

199 Returns 

200 ------- 

201 result: Union[dict, None] 

202 Dictionary with a single key, `FLAML_score`, if config already 

203 evaluated; `None` otherwise. 

204 """ 

205 cs_config = normalize_config(self.optimizer_parameter_space, config) 

206 if cs_config in self.evaluated_samples: 

207 return {self._METRIC_NAME: self.evaluated_samples[cs_config].score} 

208 

209 self._suggested_config = dict(cs_config) # Cleaned-up version of the config 

210 return None # Returning None stops the process 

211 

212 def _get_next_config(self) -> dict: 

213 """ 

214 Warm-starts a new instance of FLAML, and returns a recommended, unseen new 

215 configuration. 

216 

217 Since FLAML does not provide an ask-and-tell interface, we need to create a 

218 new instance of FLAML each time we get asked for a new suggestion. This is 

219 suboptimal performance-wise, but works. 

220 To do so, we use any previously evaluated configs to bootstrap FLAML (i.e., 

221 warm-start). 

222 For more info: 

223 https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function#warm-start 

224 

225 Returns 

226 ------- 

227 result: dict 

228 A dictionary with a single key that is equal to the name of the optimization target, 

229 if config already evaluated; `None` otherwise. 

230 

231 Raises 

232 ------ 

233 RuntimeError: if FLAML did not suggest a previously unseen configuration. 

234 """ 

235 from flaml import tune # pylint: disable=import-outside-toplevel 

236 

237 # Parse evaluated configs to format used by FLAML 

238 points_to_evaluate: list = [] 

239 evaluated_rewards: list = [] 

240 if len(self.evaluated_samples) > 0: 

241 points_to_evaluate = [ 

242 dict(normalize_config(self.optimizer_parameter_space, conf)) 

243 for conf in self.evaluated_samples 

244 ] 

245 evaluated_rewards = [s.score for s in self.evaluated_samples.values()] 

246 

247 # Warm start FLAML optimizer 

248 self._suggested_config = None 

249 tune.run( 

250 self._target_function, 

251 config=self.flaml_parameter_space, 

252 mode="min", 

253 metric=self._METRIC_NAME, 

254 points_to_evaluate=points_to_evaluate, 

255 evaluated_rewards=evaluated_rewards, 

256 num_samples=len(points_to_evaluate) + 1, 

257 low_cost_partial_config=self.low_cost_partial_config, 

258 verbose=0, 

259 ) 

260 if self._suggested_config is None: 

261 raise RuntimeError("FLAML did not produce a suggestion") 

262 

263 return self._suggested_config # type: ignore[unreachable]