Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 97%

155 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-05 00:36 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Implementation of LlamaTune space adapter. 

7""" 

8from typing import Dict, Optional 

9from warnings import warn 

10 

11import ConfigSpace 

12import numpy as np 

13import numpy.typing as npt 

14import pandas as pd 

15from sklearn.preprocessing import MinMaxScaler 

16 

17from mlos_core.util import normalize_config 

18from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

19 

20 

21class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes 

22 """ 

23 Implementation of LlamaTune, a set of parameter space transformation techniques, 

24 aimed at improving the sample-efficiency of the underlying optimizer. 

25 """ 

26 

27 DEFAULT_NUM_LOW_DIMS = 16 

28 """Default number of dimensions in the low-dimensional search space, generated by HeSBO projection""" 

29 

30 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = .2 

31 """Default percentage of bias for each special parameter value""" 

32 

33 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000 

34 """Default number of (max) unique values of each parameter, when space discretization is used""" 

35 

36 def __init__(self, *, 

37 orig_parameter_space: ConfigSpace.ConfigurationSpace, 

38 num_low_dims: int = DEFAULT_NUM_LOW_DIMS, 

39 special_param_values: Optional[dict] = None, 

40 max_unique_values_per_param: Optional[int] = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM, 

41 use_approximate_reverse_mapping: bool = False): 

42 """ 

43 Create a space adapter that employs LlamaTune's techniques. 

44 

45 Parameters 

46 ---------- 

47 orig_parameter_space : ConfigSpace.ConfigurationSpace 

48 The original (user-provided) parameter space to optimize. 

49 num_low_dims: int 

50 Number of dimensions used in the low-dimensional parameter search space. 

51 special_param_values_dict: Optional[dict] 

52 Dictionary of special 

53 max_unique_values_per_param: Optional[int]: 

54 Number of unique values per parameter. Used to discretize the parameter space. 

55 If `None` space discretization is disabled. 

56 """ 

57 super().__init__(orig_parameter_space=orig_parameter_space) 

58 

59 if num_low_dims >= len(orig_parameter_space): 

60 raise ValueError("Number of target config space dimensions should be less than those of original config space.") 

61 

62 # Validate input special param values dict 

63 special_param_values = special_param_values or {} 

64 self._validate_special_param_values(special_param_values) 

65 

66 # Create low-dimensional parameter search space 

67 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param) 

68 

69 # Initialize config values scaler: from (-1, 1) to (0, 1) range 

70 config_scaler = MinMaxScaler(feature_range=(0, 1)) 

71 ones_vector = np.ones(len(list(self.orig_parameter_space.values()))) 

72 config_scaler.fit([-ones_vector, ones_vector]) 

73 self._config_scaler = config_scaler 

74 

75 # Generate random mapping from low-dimensional space to original config space 

76 num_orig_dims = len(list(self.orig_parameter_space.values())) 

77 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims) 

78 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims) 

79 

80 # Used to retrieve the low-dim point, given the high-dim one 

81 self._suggested_configs: Dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {} 

82 self._pinv_matrix: npt.NDArray 

83 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping 

84 

85 @property 

86 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: 

87 """Get the parameter space, which is explored by the underlying optimizer.""" 

88 return self._target_config_space 

89 

90 def inverse_transform(self, configurations: pd.DataFrame) -> pd.DataFrame: 

91 target_configurations = [] 

92 for (_, config) in configurations.astype('O').iterrows(): 

93 configuration = ConfigSpace.Configuration( 

94 self.orig_parameter_space, values=config.to_dict()) 

95 

96 target_config = self._suggested_configs.get(configuration, None) 

97 # NOTE: HeSBO is a non-linear projection method, and does not inherently support inverse projection 

98 # To (partly) support this operation, we keep track of the suggested low-dim point(s) along with the 

99 # respective high-dim point; this way we can retrieve the low-dim point, from its high-dim counterpart. 

100 if target_config is None: 

101 # Inherently it is not supported to register points, which were not suggested by the optimizer. 

102 if configuration == self.orig_parameter_space.get_default_configuration(): 

103 # Default configuration should always be registerable. 

104 pass 

105 elif not self._use_approximate_reverse_mapping: 

106 raise ValueError(f"{repr(configuration)}\n" "The above configuration was not suggested by the optimizer. " 

107 "Approximate reverse mapping is currently disabled; thus *only* configurations suggested " 

108 "previously by the optimizer can be registered.") 

109 

110 # ...yet, we try to support that by implementing an approximate reverse mapping using pseudo-inverse matrix. 

111 if getattr(self, '_pinv_matrix', None) is None: 

112 self._try_generate_approx_inverse_mapping() 

113 

114 # Replace NaNs with zeros for inactive hyperparameters 

115 config_vector = np.nan_to_num(configuration.get_array(), nan=0.0) 

116 # Perform approximate reverse mapping 

117 # NOTE: applying special value biasing is not possible 

118 vector = self._config_scaler.inverse_transform([config_vector])[0] 

119 target_config_vector = self._pinv_matrix.dot(vector) 

120 target_config = ConfigSpace.Configuration(self.target_parameter_space, vector=target_config_vector) 

121 

122 target_configurations.append(target_config) 

123 

124 return pd.DataFrame(target_configurations, columns=list(self.target_parameter_space.keys())) 

125 

126 def transform(self, configuration: pd.DataFrame) -> pd.DataFrame: 

127 if len(configuration) != 1: 

128 raise ValueError("Configuration dataframe must contain exactly 1 row. " 

129 f"Found {len(configuration)} rows.") 

130 

131 target_values_dict = configuration.iloc[0].to_dict() 

132 target_configuration = ConfigSpace.Configuration(self.target_parameter_space, values=target_values_dict) 

133 

134 orig_values_dict = self._transform(target_values_dict) 

135 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict) 

136 

137 # Add to inverse dictionary -- needed for registering the performance later 

138 self._suggested_configs[orig_configuration] = target_configuration 

139 

140 return pd.DataFrame([list(orig_configuration.values())], columns=list(orig_configuration.keys())) 

141 

142 def _construct_low_dim_space(self, num_low_dims: int, max_unique_values_per_param: Optional[int]) -> None: 

143 """Constructs the low-dimensional parameter (potentially discretized) search space. 

144 

145 Parameters 

146 ---------- 

147 num_low_dims : int 

148 Number of dimensions used in the low-dimensional parameter search space. 

149 

150 max_unique_values_per_param: Optional[int]: 

151 Number of unique values per parameter. Used to discretize the parameter space. 

152 If `None` space discretization is disabled. 

153 """ 

154 # Define target space parameters 

155 q_scaler = None 

156 if max_unique_values_per_param is None: 

157 hyperparameters = [ 

158 ConfigSpace.UniformFloatHyperparameter(name=f'dim_{idx}', lower=-1, upper=1) 

159 for idx in range(num_low_dims) 

160 ] 

161 else: 

162 # Currently supported optimizers do not support defining a discretized space (like ConfigSpace does using `q` kwarg). 

163 # Thus, to support space discretization, we define the low-dimensional space using integer hyperparameters. 

164 # We also employ a scaler, which scales suggested values to [-1, 1] range, used by HeSBO projection. 

165 hyperparameters = [ 

166 ConfigSpace.UniformIntegerHyperparameter(name=f'dim_{idx}', lower=1, upper=max_unique_values_per_param) 

167 for idx in range(num_low_dims) 

168 ] 

169 

170 # Initialize quantized values scaler: from [0, max_unique_values_per_param] to (-1, 1) range 

171 q_scaler = MinMaxScaler(feature_range=(-1, 1)) 

172 ones_vector = np.ones(num_low_dims) 

173 max_value_vector = ones_vector * max_unique_values_per_param 

174 q_scaler.fit([ones_vector, max_value_vector]) 

175 

176 self._q_scaler = q_scaler 

177 

178 # Construct low-dimensional parameter search space 

179 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name) 

180 config_space.random = self._random_state # use same random state as in original parameter space 

181 config_space.add_hyperparameters(hyperparameters) 

182 self._target_config_space = config_space 

183 

184 def _transform(self, configuration: dict) -> dict: 

185 """Projects a low-dimensional point (configuration) to the high-dimensional original parameter space, 

186 and then biases the resulting parameter values towards their special value(s) (if any). 

187 

188 Parameters 

189 ---------- 

190 configuration : dict 

191 Configuration in the low-dimensional space. 

192 

193 Returns 

194 ------- 

195 configuration : dict 

196 Projected configuration in the high-dimensional original search space. 

197 """ 

198 original_parameters = list(self.orig_parameter_space.values()) 

199 low_dim_config_values = list(configuration.values()) 

200 

201 if self._q_scaler is not None: 

202 # Scale parameter values from [1, max_value] to [-1, 1] 

203 low_dim_config_values = self._q_scaler.transform([low_dim_config_values])[0] 

204 

205 # Project low-dim point to original parameter space 

206 original_config_values = [ 

207 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]] 

208 for idx in range(len(original_parameters)) 

209 ] 

210 # Scale parameter values to [0, 1] 

211 original_config_values = self._config_scaler.transform([original_config_values])[0] 

212 

213 original_config = {} 

214 for param, norm_value in zip(original_parameters, original_config_values): 

215 # Clip value to force it to fall in [0, 1] 

216 # NOTE: HeSBO projection ensures that theoretically but due to 

217 # floating point ops nuances this is not always guaranteed 

218 value = max(0., min(1., norm_value)) # pylint: disable=redefined-loop-name 

219 

220 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

221 index = int(value * len(param.choices)) # truncate integer part 

222 index = max(0, min(len(param.choices) - 1, index)) 

223 # NOTE: potential rounding here would be unfair to first & last values 

224 orig_value = param.choices[index] 

225 elif isinstance(param, ConfigSpace.hyperparameters.NumericalHyperparameter): 

226 if param.name in self._special_param_values_dict: 

227 value = self._special_param_value_scaler(param, value) 

228 

229 orig_value = param._transform(value) # pylint: disable=protected-access 

230 orig_value = max(param.lower, min(param.upper, orig_value)) 

231 else: 

232 raise NotImplementedError("Only Categorical, Integer, and Float hyperparameters are currently supported.") 

233 

234 original_config[param.name] = orig_value 

235 

236 return original_config 

237 

238 def _special_param_value_scaler(self, param: ConfigSpace.UniformIntegerHyperparameter, input_value: float) -> float: 

239 """Biases the special value(s) of this parameter, by shifting the normalized `input_value` towards those. 

240 

241 Parameters 

242 ---------- 

243 param: ConfigSpace.UniformIntegerHyperparameter 

244 Parameter of the original parameter space. 

245 

246 input_value: float 

247 Normalized value for this parameter, as suggested by the underlying optimizer. 

248 

249 Returns 

250 ------- 

251 biased_value: float 

252 Normalized value after special value(s) biasing is applied. 

253 """ 

254 special_values_list = self._special_param_values_dict[param.name] 

255 

256 # Check if input value corresponds to some special value 

257 perc_sum = 0. 

258 ret: float 

259 for special_value, biasing_perc in special_values_list: 

260 perc_sum += biasing_perc 

261 if input_value < perc_sum: 

262 ret = param._inverse_transform(special_value) # pylint: disable=protected-access 

263 return ret 

264 

265 # Scale input value uniformly to non-special values 

266 ret = param._inverse_transform( # pylint: disable=protected-access 

267 param._transform_scalar((input_value - perc_sum) / (1 - perc_sum))) # pylint: disable=protected-access 

268 return ret 

269 

270 # pylint: disable=too-complex,too-many-branches 

271 def _validate_special_param_values(self, special_param_values_dict: dict) -> None: 

272 """Checks that the user-provided dict of special parameter values is valid. 

273 And assigns it to the corresponding attribute. 

274 

275 Parameters 

276 ---------- 

277 special_param_values_dict: dict 

278 User-provided dict of special parameter values. 

279 

280 Raises 

281 ------ 

282 ValueError: if dictionary key, valid, or structure is invalid. 

283 NotImplementedError: if special value is defined for a non-integer parameter 

284 """ 

285 error_prefix = "Validation of special parameter values dict failed." 

286 

287 all_parameters = list(self.orig_parameter_space.keys()) 

288 sanitized_dict = {} 

289 

290 for param, value in special_param_values_dict.items(): 

291 if param not in all_parameters: 

292 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.") 

293 

294 hyperparameter = self.orig_parameter_space[param] 

295 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter): 

296 raise NotImplementedError(error_prefix + f"Parameter '{param}' is not supported. " 

297 "Only Integer Hyperparameters are currently supported.") 

298 

299 if isinstance(value, int): 

300 # User specifies a single special value -- default biasing percentage is used 

301 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)] 

302 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]: 

303 # User specifies both special value and biasing percentage 

304 tuple_list = [value] 

305 elif isinstance(value, list) and value: 

306 if all(isinstance(t, int) for t in value): 

307 # User specifies list of special values 

308 tuple_list = [(v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value] 

309 elif all(isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value): 

310 # User specifies list of tuples; each tuple defines the special value and the biasing percentage 

311 tuple_list = value 

312 else: 

313 raise ValueError(error_prefix + f"Invalid format in value list for parameter '{param}'. " 

314 f"Special value list should contain either integers, or (special value, biasing %) tuples.") 

315 else: 

316 raise ValueError(error_prefix + f"Invalid format for parameter '{param}'. Dict value should be " 

317 "an int, a (int, float) tuple, a list of integers, or a list of (int, float) tuples.") 

318 

319 # Are user-specified special values valid? 

320 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list): 

321 raise ValueError(error_prefix + f"One (or more) special values are outside of parameter '{param}' value domain.") 

322 # Are user-provided special values unique? 

323 if len(set(v for v, _ in tuple_list)) != len(tuple_list): 

324 raise ValueError(error_prefix + f"One (or more) special values are defined more than once for parameter '{param}'.") 

325 # Are biasing percentages valid? 

326 if not all(0 < perc < 1 for _, perc in tuple_list): 

327 raise ValueError(error_prefix + f"One (or more) biasing percentages for parameter '{param}' are invalid: " 

328 "i.e., fall outside (0, 1) range.") 

329 

330 total_percentage = sum(perc for _, perc in tuple_list) 

331 if total_percentage >= 1.: 

332 raise ValueError(error_prefix + f"Total special values percentage for parameter '{param}' surpass 100%.") 

333 # ... and reasonable? 

334 if total_percentage >= 0.5: 

335 warn(f"Total special values percentage for parameter '{param}' exceeds 50%.", UserWarning) 

336 

337 sanitized_dict[param] = tuple_list 

338 

339 self._special_param_values_dict = sanitized_dict 

340 

341 def _try_generate_approx_inverse_mapping(self) -> None: 

342 """Tries to generate an approximate reverse mapping: i.e., from high-dimensional space to the low-dimensional one. 

343 Reverse mapping is generated using the pseudo-inverse matrix, of original HeSBO projection matrix. 

344 This mapping can be potentially used to register configurations that were *not* previously suggested by the optimizer. 

345 

346 NOTE: This method is experimental, and there is currently no guarantee that it works as expected. 

347 

348 Raises 

349 ------ 

350 RuntimeError: if reverse mapping computation fails. 

351 """ 

352 from scipy.linalg import pinv, LinAlgError # pylint: disable=import-outside-toplevel 

353 

354 warn("Trying to register a configuration that was not previously suggested by the optimizer. " + 

355 "This inverse configuration transformation is typically not supported. " + 

356 "However, we will try to register this configuration using an *experimental* method.", UserWarning) 

357 

358 orig_space_num_dims = len(list(self.orig_parameter_space.values())) 

359 target_space_num_dims = len(list(self.target_parameter_space.values())) 

360 

361 # Construct dense projection matrix from sparse repr 

362 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims)) 

363 for row, col in enumerate(self._h_matrix): 

364 proj_matrix[row][col] = self._sigma_vector[row] 

365 

366 # Compute pseudo-inverse matrix 

367 try: 

368 self._pinv_matrix = pinv(proj_matrix) 

369 except LinAlgError as err: 

370 raise RuntimeError(f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}") from err 

371 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)