Coverage for mlos_bench/mlos_bench/optimizers/mlos_core_optimizer.py: 98%

106 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-05 00:36 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6A wrapper for mlos_core optimizers for mlos_bench. 

7""" 

8 

9import logging 

10import os 

11 

12from types import TracebackType 

13from typing import Dict, Optional, Sequence, Tuple, Type, Union 

14from typing_extensions import Literal 

15 

16import pandas as pd 

17 

18from mlos_core.optimizers import ( 

19 BaseOptimizer, OptimizerType, OptimizerFactory, SpaceAdapterType, DEFAULT_OPTIMIZER_TYPE 

20) 

21 

22from mlos_bench.environments.status import Status 

23from mlos_bench.services.base_service import Service 

24from mlos_bench.tunables.tunable import TunableValue 

25from mlos_bench.tunables.tunable_groups import TunableGroups 

26from mlos_bench.optimizers.base_optimizer import Optimizer 

27 

28from mlos_bench.optimizers.convert_configspace import ( 

29 TunableValueKind, 

30 configspace_data_to_tunable_values, 

31 special_param_names, 

32) 

33 

34_LOG = logging.getLogger(__name__) 

35 

36 

37class MlosCoreOptimizer(Optimizer): 

38 """ 

39 A wrapper class for the mlos_core optimizers. 

40 """ 

41 

42 def __init__(self, 

43 tunables: TunableGroups, 

44 config: dict, 

45 global_config: Optional[dict] = None, 

46 service: Optional[Service] = None): 

47 super().__init__(tunables, config, global_config, service) 

48 

49 opt_type = getattr(OptimizerType, self._config.pop( 

50 'optimizer_type', DEFAULT_OPTIMIZER_TYPE.name)) 

51 

52 if opt_type == OptimizerType.SMAC: 

53 output_directory = self._config.get('output_directory') 

54 if output_directory is not None: 

55 # If output_directory is specified, turn it into an absolute path. 

56 self._config['output_directory'] = os.path.abspath(output_directory) 

57 else: 

58 _LOG.warning("SMAC optimizer output_directory was null. SMAC will use a temporary directory.") 

59 

60 # Make sure max_trials >= max_iterations. 

61 if 'max_trials' not in self._config: 

62 self._config['max_trials'] = self._max_iter 

63 assert int(self._config['max_trials']) >= self._max_iter, \ 

64 f"max_trials {self._config.get('max_trials')} <= max_iterations {self._max_iter}" 

65 

66 if 'run_name' not in self._config and self.experiment_id: 

67 self._config['run_name'] = self.experiment_id 

68 

69 space_adapter_type = self._config.pop('space_adapter_type', None) 

70 space_adapter_config = self._config.pop('space_adapter_config', {}) 

71 

72 if space_adapter_type is not None: 

73 space_adapter_type = getattr(SpaceAdapterType, space_adapter_type) 

74 

75 self._opt: BaseOptimizer = OptimizerFactory.create( 

76 parameter_space=self.config_space, 

77 optimizer_type=opt_type, 

78 optimizer_kwargs=self._config, 

79 space_adapter_type=space_adapter_type, 

80 space_adapter_kwargs=space_adapter_config, 

81 ) 

82 

83 def __exit__(self, ex_type: Optional[Type[BaseException]], 

84 ex_val: Optional[BaseException], 

85 ex_tb: Optional[TracebackType]) -> Literal[False]: 

86 self._opt.cleanup() 

87 return super().__exit__(ex_type, ex_val, ex_tb) 

88 

89 @property 

90 def name(self) -> str: 

91 return f"{self.__class__.__name__}:{self._opt.__class__.__name__}" 

92 

93 def bulk_register(self, 

94 configs: Sequence[dict], 

95 scores: Sequence[Optional[Dict[str, TunableValue]]], 

96 status: Optional[Sequence[Status]] = None) -> bool: 

97 if not super().bulk_register(configs, scores, status): 

98 return False 

99 df_configs = self._to_df(configs) # Impute missing values, if necessary 

100 df_scores = pd.Series( 

101 [self._extract_target(score) for score in scores], 

102 dtype=float) * self._opt_sign 

103 if status is not None: 

104 df_status = pd.Series(status) 

105 df_scores[df_status != Status.SUCCEEDED] = float("inf") 

106 df_status_completed = df_status.apply(Status.is_completed) 

107 df_configs = df_configs[df_status_completed] 

108 df_scores = df_scores[df_status_completed] 

109 self._opt.register(df_configs, df_scores) 

110 if _LOG.isEnabledFor(logging.DEBUG): 

111 (score, _) = self.get_best_observation() 

112 _LOG.debug("Warm-up end: %s = %s", self.target, score) 

113 return True 

114 

115 def _extract_target(self, scores: Optional[Dict[str, TunableValue]]) -> Optional[TunableValue]: 

116 return None if scores is None else scores[self._opt_target] 

117 

118 def _to_df(self, configs: Sequence[Dict[str, TunableValue]]) -> pd.DataFrame: 

119 """ 

120 Select from past trials only the columns required in this experiment and 

121 impute default values for the tunables that are missing in the dataframe. 

122 

123 Parameters 

124 ---------- 

125 configs : Sequence[dict] 

126 Sequence of dicts with past trials data. 

127 

128 Returns 

129 ------- 

130 df_configs : pd.DataFrame 

131 A dataframe with past trials data, with missing values imputed. 

132 """ 

133 df_configs = pd.DataFrame(configs) 

134 tunables_names = list(self._tunables.get_param_values().keys()) 

135 missing_cols = set(tunables_names).difference(df_configs.columns) 

136 for (tunable, _group) in self._tunables: 

137 if tunable.name in missing_cols: 

138 df_configs[tunable.name] = tunable.default 

139 else: 

140 df_configs.fillna({tunable.name: tunable.default}, inplace=True) 

141 # External data can have incorrect types (e.g., all strings). 

142 df_configs[tunable.name] = df_configs[tunable.name].astype(tunable.dtype) 

143 # Add columns for tunables with special values. 

144 if tunable.special: 

145 (special_name, type_name) = special_param_names(tunable.name) 

146 tunables_names += [special_name, type_name] 

147 is_special = df_configs[tunable.name].apply(tunable.special.__contains__) 

148 df_configs[type_name] = TunableValueKind.RANGE 

149 df_configs.loc[is_special, type_name] = TunableValueKind.SPECIAL 

150 if tunable.type == "int": 

151 # Make int column NULLABLE: 

152 df_configs[tunable.name] = df_configs[tunable.name].astype("Int64") 

153 df_configs[special_name] = df_configs[tunable.name] 

154 df_configs.loc[~is_special, special_name] = None 

155 df_configs.loc[is_special, tunable.name] = None 

156 # By default, hyperparameters in ConfigurationSpace are sorted by name: 

157 df_configs = df_configs[sorted(tunables_names)] 

158 _LOG.debug("Loaded configs:\n%s", df_configs) 

159 return df_configs 

160 

161 def suggest(self) -> TunableGroups: 

162 tunables = super().suggest() 

163 if self._start_with_defaults: 

164 _LOG.info("Use default values for the first trial") 

165 df_config = self._opt.suggest(defaults=self._start_with_defaults) 

166 self._start_with_defaults = False 

167 _LOG.info("Iteration %d :: Suggest:\n%s", self._iter, df_config) 

168 return tunables.assign( 

169 configspace_data_to_tunable_values(df_config.loc[0].to_dict())) 

170 

171 def register(self, tunables: TunableGroups, status: Status, 

172 score: Optional[Union[float, dict]] = None) -> Optional[float]: 

173 score = super().register(tunables, status, score) # With _opt_sign applied 

174 if status.is_completed(): 

175 df_config = self._to_df([tunables.get_param_values()]) 

176 _LOG.debug("Score: %s Dataframe:\n%s", score, df_config) 

177 self._opt.register(df_config, pd.Series([score], dtype=float)) 

178 return score 

179 

180 def get_best_observation(self) -> Union[Tuple[float, TunableGroups], Tuple[None, None]]: 

181 df_config = self._opt.get_best_observation() 

182 if len(df_config) == 0: 

183 return (None, None) 

184 params = configspace_data_to_tunable_values(df_config.iloc[0].to_dict()) 

185 _LOG.debug("Best observation: %s", params) 

186 score = params.pop("score") 

187 assert score is not None 

188 score = float(score) * self._opt_sign # mlos_core always uses the `score` column 

189 return (score, self._tunables.copy().assign(params))