Coverage for mlos_core/mlos_core/spaces/adapters/llamatune.py: 95%

184 statements  

« prev     ^ index     » next       coverage.py v7.8.0, created at 2025-04-01 00:52 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Implementation of LlamaTune space adapter. 

7 

8LlamaTune is a technique that transforms the original parameter space into a 

9lower-dimensional space to try and improve the sample efficiency of the underlying 

10optimizer by making use of the inherent parameter sensitivity correlations in most 

11systems. 

12 

13See Also: `LlamaTune: Sample-Efficient DBMS Configuration Tuning 

14<https://www.microsoft.com/en-us/research/publication/llamatune-sample-efficient-dbms-configuration-tuning>`_. 

15""" 

16import os 

17from importlib.metadata import version 

18from typing import Any 

19from warnings import warn 

20 

21import ConfigSpace 

22import ConfigSpace.exceptions 

23import numpy as np 

24import numpy.typing as npt 

25import pandas as pd 

26from ConfigSpace.hyperparameters import NumericalHyperparameter 

27from packaging.version import Version 

28from sklearn.preprocessing import MinMaxScaler 

29 

30from mlos_core.spaces.adapters.adapter import BaseSpaceAdapter 

31from mlos_core.util import normalize_config 

32 

33_NUMPY_VERS = Version(version("numpy")) 

34 

35 

36class LlamaTuneAdapter(BaseSpaceAdapter): # pylint: disable=too-many-instance-attributes 

37 """Implementation of LlamaTune, a set of parameter space transformation techniques, 

38 aimed at improving the sample-efficiency of the underlying optimizer. 

39 """ 

40 

41 DEFAULT_NUM_LOW_DIMS = 16 

42 """Default number of dimensions in the low-dimensional search space, generated by 

43 HeSBO projection. 

44 """ 

45 

46 DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE = 0.2 

47 """Default percentage of bias for each special parameter value.""" 

48 

49 DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM = 10000 

50 """Default number of (max) unique values of each parameter, when space 

51 discretization is used. 

52 """ 

53 

54 def __init__( # pylint: disable=too-many-arguments 

55 self, 

56 *, 

57 orig_parameter_space: ConfigSpace.ConfigurationSpace, 

58 num_low_dims: int = DEFAULT_NUM_LOW_DIMS, 

59 special_param_values: dict | None = None, 

60 max_unique_values_per_param: int | None = DEFAULT_MAX_UNIQUE_VALUES_PER_PARAM, 

61 use_approximate_reverse_mapping: bool = False, 

62 ): 

63 """ 

64 Create a space adapter that employs LlamaTune's techniques. 

65 

66 Parameters 

67 ---------- 

68 orig_parameter_space : ConfigSpace.ConfigurationSpace 

69 The original (user-provided) parameter space to optimize. 

70 num_low_dims : int 

71 Number of dimensions used in the low-dimensional parameter search space. 

72 special_param_values : dict | None 

73 Dictionary of special parameter values. 

74 Each key is the name of a parameter, and the value is either: 

75 - an integer (special value), or 

76 - a tuple of an integer and a float (special integer value and biasing percentage) 

77 max_unique_values_per_param : int | None 

78 Number of unique values per parameter. Used to discretize the parameter space. 

79 If `None` space discretization is disabled. 

80 use_approximate_reverse_mapping : bool 

81 Whether to use an approximate reverse mapping to help register 

82 configurations during resume. 

83 """ 

84 super().__init__(orig_parameter_space=orig_parameter_space) 

85 

86 if num_low_dims >= len(orig_parameter_space): 

87 raise ValueError( 

88 "Number of target config space dimensions should be " 

89 "less than those of original config space." 

90 ) 

91 

92 # Validate input special param values dict 

93 special_param_values = special_param_values or {} 

94 self._validate_special_param_values(special_param_values) 

95 

96 # Create low-dimensional parameter search space 

97 self._construct_low_dim_space(num_low_dims, max_unique_values_per_param) 

98 

99 # Initialize config values scaler: from (-1, 1) to (0, 1) range 

100 config_scaler = MinMaxScaler(feature_range=(0, 1)) 

101 ones_vector = np.ones(len(list(self.orig_parameter_space.values()))) 

102 config_scaler.fit(np.array([-ones_vector, ones_vector])) 

103 self._config_scaler = config_scaler 

104 

105 # Generate random mapping from low-dimensional space to original config space 

106 num_orig_dims = len(list(self.orig_parameter_space.values())) 

107 self._h_matrix = self._random_state.choice(range(num_low_dims), num_orig_dims) 

108 self._sigma_vector = self._random_state.choice([-1, 1], num_orig_dims) 

109 

110 # Used to retrieve the low-dim point, given the high-dim one 

111 self._suggested_configs: dict[ConfigSpace.Configuration, ConfigSpace.Configuration] = {} 

112 self._pinv_matrix: npt.NDArray 

113 self._use_approximate_reverse_mapping = use_approximate_reverse_mapping 

114 

115 @property 

116 def target_parameter_space(self) -> ConfigSpace.ConfigurationSpace: 

117 """Get the parameter space, which is explored by the underlying optimizer.""" 

118 return self._target_config_space 

119 

120 def inverse_transform(self, configuration: pd.Series) -> pd.Series: 

121 config = ConfigSpace.Configuration( 

122 self.orig_parameter_space, 

123 values=configuration.dropna().to_dict(), 

124 ) 

125 

126 target_config = self._suggested_configs.get(config, None) 

127 # NOTE: HeSBO is a non-linear projection method, and does not inherently 

128 # support inverse projection 

129 # To (partly) support this operation, we keep track of the suggested 

130 # low-dim point(s) along with the respective high-dim point; this way we 

131 # can retrieve the low-dim point, from its high-dim counterpart. 

132 if target_config is None: 

133 # Inherently it is not supported to register points, which were not 

134 # suggested by the optimizer. 

135 if config == self.orig_parameter_space.get_default_configuration(): 

136 # Default configuration should always be registerable. 

137 pass 

138 elif not self._use_approximate_reverse_mapping: 

139 raise ValueError( 

140 f"{repr(config)}\n" 

141 "The above configuration was not suggested by the optimizer. " 

142 "Approximate reverse mapping is currently disabled; " 

143 "thus *only* configurations suggested " 

144 "previously by the optimizer can be registered." 

145 ) 

146 

147 target_config = self._try_inverse_transform_config(config) 

148 

149 return pd.Series(target_config, index=list(self.target_parameter_space.keys())) 

150 

151 def _try_inverse_transform_config( 

152 self, 

153 config: ConfigSpace.Configuration, 

154 ) -> ConfigSpace.Configuration: 

155 """ 

156 Attempts to generate an inverse mapping of the given configuration that wasn't 

157 previously registered. 

158 

159 Parameters 

160 ---------- 

161 configuration : ConfigSpace.Configuration 

162 Configuration in the original high-dimensional space. 

163 

164 Returns 

165 ------- 

166 ConfigSpace.Configuration 

167 Configuration in the low-dimensional space. 

168 

169 Raises 

170 ------ 

171 ValueError 

172 On conversion errors. 

173 """ 

174 # ...yet, we try to support that by implementing an approximate 

175 # reverse mapping using pseudo-inverse matrix. 

176 if getattr(self, "_pinv_matrix", None) is None: 

177 self._try_generate_approx_inverse_mapping() 

178 

179 # Replace NaNs with zeros for inactive hyperparameters 

180 config_vector = np.nan_to_num(config.get_array(), nan=0.0) 

181 # Perform approximate reverse mapping 

182 # NOTE: applying special value biasing is not possible 

183 vector: npt.NDArray = self._config_scaler.inverse_transform(np.array([config_vector]))[0] 

184 target_config_vector: npt.NDArray = self._pinv_matrix.dot(vector) 

185 # Clip values to to [-1, 1] range of the low dimensional space. 

186 for idx, value in enumerate(target_config_vector): 

187 target_config_vector[idx] = np.clip(value, -1, 1) 

188 if self._q_scaler is not None: 

189 # If the max_unique_values_per_param is set, we need to scale 

190 # the low dimension space back to the discretized space as well. 

191 target_config_vector = self._q_scaler.inverse_transform( 

192 np.array([target_config_vector]) 

193 )[0] 

194 assert isinstance(target_config_vector, np.ndarray) 

195 # Clip values to [1, max_value] range (floating point errors may occur). 

196 for idx, value in enumerate(target_config_vector): 

197 target_config_vector[idx] = int(np.clip(value, 1, self._q_scaler.data_max_[idx])) 

198 target_config_vector = target_config_vector.astype(int) 

199 # Convert the vector to a dictionary. 

200 target_config_dict = dict( 

201 zip( 

202 self.target_parameter_space.keys(), 

203 target_config_vector, 

204 ) 

205 ) 

206 target_config = ConfigSpace.Configuration( 

207 self.target_parameter_space, 

208 values=target_config_dict, 

209 # This method results in hyperparameter type conversion issues 

210 # (e.g., float instead of int), so we use the values dict instead. 

211 # vector=target_config_vector, 

212 ) 

213 

214 # Check to see if the approximate reverse mapping looks OK. 

215 # Note: we know this isn't 100% accurate, so this is just a warning and 

216 # mostly meant for internal debugging. 

217 configuration_dict = dict(config) 

218 double_checked_config = self._transform(dict(target_config)) 

219 double_checked_config = { 

220 # Skip the special values that aren't in the original space. 

221 k: v 

222 for k, v in double_checked_config.items() 

223 if k in configuration_dict 

224 } 

225 if double_checked_config != configuration_dict and ( 

226 os.environ.get("MLOS_DEBUG", "false").lower() in {"1", "true", "y", "yes"} 

227 ): 

228 warn( 

229 ( 

230 f"Note: Configuration {configuration_dict} was inverse transformed to " 

231 f"{dict(target_config)} and then back to {double_checked_config}. " 

232 "This is an approximate reverse mapping for previously unregistered " 

233 "configurations, so this is just a warning." 

234 ), 

235 UserWarning, 

236 ) 

237 

238 # But the inverse mapping should at least be valid in the target space. 

239 try: 

240 ConfigSpace.Configuration( 

241 self.target_parameter_space, 

242 values=target_config, 

243 ).check_valid_configuration() 

244 except ConfigSpace.exceptions.IllegalValueError as err: 

245 raise ValueError( 

246 f"Invalid configuration {target_config} generated by " 

247 f"inverse mapping of {config}:\n{err}" 

248 ) from err 

249 

250 return target_config 

251 

252 def transform(self, configuration: pd.Series) -> pd.Series: 

253 target_values_dict = configuration.to_dict() 

254 target_configuration = ConfigSpace.Configuration( 

255 self.target_parameter_space, 

256 values=target_values_dict, 

257 ) 

258 

259 orig_values_dict = self._transform(target_values_dict) 

260 orig_configuration = normalize_config(self.orig_parameter_space, orig_values_dict) 

261 

262 # Validate that the configuration is in the original space. 

263 try: 

264 ConfigSpace.Configuration( 

265 self.orig_parameter_space, 

266 values=orig_configuration, 

267 ).check_valid_configuration() 

268 except ConfigSpace.exceptions.IllegalValueError as err: 

269 raise ValueError( 

270 f"Invalid configuration {orig_configuration} generated by " 

271 f"transformation of {target_configuration}:\n{err}" 

272 ) from err 

273 

274 # Add to inverse dictionary -- needed for registering the performance later 

275 self._suggested_configs[orig_configuration] = target_configuration 

276 

277 ret: pd.Series = pd.Series( 

278 list(orig_configuration.values()), index=list(orig_configuration.keys()) 

279 ) 

280 return ret 

281 

282 def _construct_low_dim_space( 

283 self, 

284 num_low_dims: int, 

285 max_unique_values_per_param: int | None, 

286 ) -> None: 

287 """ 

288 Constructs the low-dimensional parameter (potentially discretized) search space. 

289 

290 Parameters 

291 ---------- 

292 num_low_dims : int 

293 Number of dimensions used in the low-dimensional parameter search space. 

294 

295 max_unique_values_per_param: int | None: 

296 Number of unique values per parameter. Used to discretize the parameter space. 

297 If `None` space discretization is disabled. 

298 """ 

299 # Define target space parameters 

300 q_scaler = None 

301 hyperparameters: list[ 

302 ConfigSpace.UniformFloatHyperparameter | ConfigSpace.UniformIntegerHyperparameter 

303 ] 

304 if max_unique_values_per_param is None: 

305 hyperparameters = [ 

306 ConfigSpace.UniformFloatHyperparameter(name=f"dim_{idx}", lower=-1, upper=1) 

307 for idx in range(num_low_dims) 

308 ] 

309 else: 

310 # Currently supported optimizers do not support defining a discretized 

311 # space (like ConfigSpace does using `q` kwarg). 

312 # Thus, to support space discretization, we define the low-dimensional 

313 # space using integer hyperparameters. 

314 # We also employ a scaler, which scales suggested values to [-1, 1] 

315 # range, used by HeSBO projection. 

316 hyperparameters = [ 

317 ConfigSpace.UniformIntegerHyperparameter( 

318 name=f"dim_{idx}", 

319 lower=1, 

320 upper=max_unique_values_per_param, 

321 ) 

322 for idx in range(num_low_dims) 

323 ] 

324 

325 # Initialize quantized values scaler: 

326 # from [0, max_unique_values_per_param] to (-1, 1) range 

327 q_scaler = MinMaxScaler(feature_range=(-1, 1)) 

328 ones_vector = np.ones(num_low_dims) 

329 max_value_vector = ones_vector * max_unique_values_per_param 

330 q_scaler.fit(np.array([ones_vector, max_value_vector])) 

331 

332 self._q_scaler = q_scaler 

333 

334 # Construct low-dimensional parameter search space 

335 config_space = ConfigSpace.ConfigurationSpace(name=self.orig_parameter_space.name) 

336 # use same random state as in original parameter space 

337 config_space.random = self._random_state 

338 config_space.add(hyperparameters) 

339 self._target_config_space = config_space 

340 

341 def _transform(self, configuration: dict) -> dict: 

342 """ 

343 Projects a low-dimensional point (configuration) to the high-dimensional 

344 original parameter space, and then biases the resulting parameter values towards 

345 their special value(s) (if any). 

346 

347 Parameters 

348 ---------- 

349 configuration : dict 

350 Configuration in the low-dimensional space. 

351 

352 Returns 

353 ------- 

354 configuration : dict 

355 Projected configuration in the high-dimensional original search space. 

356 """ 

357 original_parameters = list(self.orig_parameter_space.values()) 

358 low_dim_config_values = list(configuration.values()) 

359 

360 if self._q_scaler is not None: 

361 # Scale parameter values from [1, max_value] to [-1, 1] 

362 low_dim_config_values = self._q_scaler.transform(np.array([low_dim_config_values]))[0] 

363 

364 # Project low-dim point to original parameter space 

365 original_config_values = [ 

366 self._sigma_vector[idx] * low_dim_config_values[self._h_matrix[idx]] 

367 for idx in range(len(original_parameters)) 

368 ] 

369 # Scale parameter values to [0, 1] 

370 original_config_values = self._config_scaler.transform(np.array([original_config_values]))[ 

371 0 

372 ] 

373 

374 original_config = {} 

375 for param, norm_value in zip(original_parameters, original_config_values): 

376 # Clip value to force it to fall in [0, 1] 

377 # NOTE: HeSBO projection ensures that theoretically but due to 

378 # floating point ops nuances this is not always guaranteed 

379 value = np.clip(norm_value, 0, 1) 

380 

381 if isinstance(param, ConfigSpace.CategoricalHyperparameter): 

382 index = int(value * len(param.choices)) # truncate integer part 

383 index = max(0, min(len(param.choices) - 1, index)) 

384 # NOTE: potential rounding here would be unfair to first & last values 

385 orig_value = param.choices[index] 

386 elif isinstance(param, NumericalHyperparameter): 

387 if param.name in self._special_param_values_dict: 

388 value = self._special_param_value_scaler(param, value) 

389 

390 orig_value = param.to_value(value) 

391 orig_value = np.clip(orig_value, param.lower, param.upper) 

392 

393 if _NUMPY_VERS >= Version("2.0"): 

394 # Convert numpy types to native Python types (e.g., np.int64 to int) 

395 # This was performed automatically in NumPy<2.0, but not anymore. 

396 # see, https://numpy.org/doc/stable/reference/generated/numpy.can_cast.html 

397 orig_value = orig_value.item() 

398 

399 else: 

400 raise NotImplementedError( 

401 "Only Categorical, Integer, and Float hyperparameters are currently supported." 

402 ) 

403 

404 original_config[param.name] = orig_value 

405 

406 return original_config 

407 

408 def _special_param_value_scaler( 

409 self, 

410 param: NumericalHyperparameter, 

411 input_value: float, 

412 ) -> float: 

413 """ 

414 Biases the special value(s) of this parameter, by shifting the normalized 

415 `input_value` towards those. 

416 

417 Parameters 

418 ---------- 

419 param: NumericalHyperparameter 

420 Parameter of the original parameter space. 

421 

422 input_value: float 

423 Normalized value for this parameter, as suggested by the underlying optimizer. 

424 

425 Returns 

426 ------- 

427 biased_value: float 

428 Normalized value after special value(s) biasing is applied. 

429 """ 

430 special_values_list = self._special_param_values_dict[param.name] 

431 

432 # Check if input value corresponds to some special value 

433 perc_sum = 0.0 

434 for special_value, biasing_perc in special_values_list: 

435 perc_sum += biasing_perc 

436 if input_value < perc_sum: 

437 return float(param.to_vector(special_value)) 

438 

439 # Scale input value uniformly to non-special values 

440 return float(param.to_vector((input_value - perc_sum) / (1 - perc_sum))) 

441 

442 # pylint: disable=too-complex,too-many-branches 

443 def _validate_special_param_values(self, special_param_values_dict: dict) -> None: 

444 """ 

445 Checks that the user-provided dict of special parameter values is valid. And 

446 assigns it to the corresponding attribute. 

447 

448 Parameters 

449 ---------- 

450 special_param_values_dict: dict 

451 User-provided dict of special parameter values. 

452 

453 Raises 

454 ------ 

455 ValueError: if dictionary key, valid, or structure is invalid. 

456 NotImplementedError: if special value is defined for a non-integer parameter 

457 """ 

458 error_prefix = "Validation of special parameter values dict failed." 

459 

460 all_parameters = list(self.orig_parameter_space.keys()) 

461 sanitized_dict = {} 

462 

463 for param, value in special_param_values_dict.items(): 

464 if param not in all_parameters: 

465 raise ValueError(error_prefix + f"Parameter '{param}' does not exist.") 

466 

467 hyperparameter = self.orig_parameter_space[param] 

468 if not isinstance(hyperparameter, ConfigSpace.UniformIntegerHyperparameter): 

469 raise NotImplementedError( 

470 error_prefix + f"Parameter '{param}' is not supported. " 

471 "Only Integer Hyperparameters are currently supported." 

472 ) 

473 

474 if isinstance(value, int): 

475 # User specifies a single special value -- default biasing percentage is used 

476 tuple_list = [(value, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE)] 

477 elif isinstance(value, tuple) and [type(v) for v in value] == [int, float]: 

478 # User specifies both special value and biasing percentage 

479 tuple_list = [value] 

480 elif isinstance(value, list) and value: 

481 if all(isinstance(t, int) for t in value): 

482 # User specifies list of special values 

483 tuple_list = [ 

484 (v, self.DEFAULT_SPECIAL_PARAM_VALUE_BIASING_PERCENTAGE) for v in value 

485 ] 

486 elif all( 

487 isinstance(t, tuple) and [type(v) for v in t] == [int, float] for t in value 

488 ): 

489 # User specifies list of tuples; each tuple defines the special 

490 # value and the biasing percentage 

491 tuple_list = value 

492 else: 

493 raise ValueError( 

494 error_prefix + f"Invalid format in value list for parameter '{param}'. " 

495 f"Special value list should contain either integers, " 

496 "or (special value, biasing %) tuples." 

497 ) 

498 else: 

499 raise ValueError( 

500 error_prefix + f"Invalid format for parameter '{param}'. Dict value should be " 

501 "an int, a (int, float) tuple, a list of integers, " 

502 "or a list of (int, float) tuples." 

503 ) 

504 

505 # Are user-specified special values valid? 

506 if not all(hyperparameter.lower <= v <= hyperparameter.upper for v, _ in tuple_list): 

507 raise ValueError( 

508 error_prefix 

509 + "One (or more) special values are outside of parameter " 

510 + f"'{param}' value domain." 

511 ) 

512 # Are user-provided special values unique? 

513 if len({v for v, _ in tuple_list}) != len(tuple_list): 

514 raise ValueError( 

515 error_prefix 

516 + "One (or more) special values are defined more than once " 

517 + f"for parameter '{param}'." 

518 ) 

519 # Are biasing percentages valid? 

520 if not all(0 < perc < 1 for _, perc in tuple_list): 

521 raise ValueError( 

522 error_prefix 

523 + f"One (or more) biasing percentages for parameter '{param}' are invalid: " 

524 "i.e., fall outside (0, 1) range." 

525 ) 

526 

527 total_percentage = sum(perc for _, perc in tuple_list) 

528 if total_percentage >= 1.0: 

529 raise ValueError( 

530 error_prefix 

531 + f"Total special values percentage for parameter '{param}' surpass 100%." 

532 ) 

533 # ... and reasonable? 

534 if total_percentage >= 0.5: 

535 warn( 

536 f"Total special values percentage for parameter '{param}' exceeds 50%.", 

537 UserWarning, 

538 ) 

539 

540 sanitized_dict[param] = tuple_list 

541 

542 self._special_param_values_dict = sanitized_dict 

543 

544 def _try_generate_approx_inverse_mapping(self) -> None: 

545 """Tries to generate an approximate reverse mapping: 

546 i.e., from high-dimensional space to the low-dimensional one. 

547 

548 Reverse mapping is generated using the pseudo-inverse matrix, of original 

549 HeSBO projection matrix. 

550 This mapping can be potentially used to register configurations that were 

551 *not* previously suggested by the optimizer. 

552 

553 NOTE: This method is experimental, and there is currently no guarantee that 

554 it works as expected. 

555 

556 Raises 

557 ------ 

558 RuntimeError: if reverse mapping computation fails. 

559 """ 

560 from scipy.linalg import ( # pylint: disable=import-outside-toplevel 

561 LinAlgError, 

562 pinv, 

563 ) 

564 

565 warn( 

566 ( 

567 "Trying to register a configuration that was not " 

568 "previously suggested by the optimizer.\n" 

569 "This inverse configuration transformation is typically not supported.\n" 

570 "However, we will try to register this configuration " 

571 "using an *experimental* method." 

572 ), 

573 UserWarning, 

574 ) 

575 

576 orig_space_num_dims = len(list(self.orig_parameter_space.values())) 

577 target_space_num_dims = len(list(self.target_parameter_space.values())) 

578 

579 # Construct dense projection matrix from sparse repr 

580 proj_matrix = np.zeros(shape=(orig_space_num_dims, target_space_num_dims)) 

581 for row, col in enumerate(self._h_matrix): 

582 proj_matrix[row][col] = self._sigma_vector[row] 

583 

584 # Compute pseudo-inverse matrix 

585 try: 

586 _inv = pinv(proj_matrix) 

587 assert _inv is not None and not isinstance(_inv, tuple) 

588 inv_matrix: npt.NDArray[np.floating[Any]] = _inv 

589 self._pinv_matrix = inv_matrix 

590 except LinAlgError as err: 

591 raise RuntimeError( 

592 f"Unable to generate reverse mapping using pseudo-inverse matrix: {repr(err)}" 

593 ) from err 

594 assert self._pinv_matrix.shape == (target_space_num_dims, orig_space_num_dims)