Coverage for mlos_bench/mlos_bench/storage/base_experiment_data.py: 86%

49 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-20 00:44 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Base interface for accessing the stored benchmark experiment data. 

7 

8An experiment is a collection of trials that are run with a given set of scripts and 

9target system. 

10 

11Each trial is associated with a configuration (e.g., set of tunable parameters), but 

12multiple trials may use the same config (e.g., for repeat run variability analysis). 

13 

14See Also 

15-------- 

16mlos_bench.storage : 

17 The base storage module for mlos_bench, which includes some basic examples 

18 in the documentation. 

19ExperimentData.results_df : 

20 Retrieves a pandas DataFrame of the Experiment's trials' results data. 

21ExperimentData.trials : 

22 Retrieves a dictionary of the Experiment's trials' data. 

23ExperimentData.tunable_configs : 

24 Retrieves a dictionary of the Experiment's sampled configs data. 

25ExperimentData.tunable_config_trial_groups : 

26 Retrieves a dictionary of the Experiment's trials' data, grouped by shared 

27 tunable config. 

28mlos_bench.storage.base_trial_data.TrialData : 

29 Base interface for accessing the stored benchmark trial data. 

30""" 

31 

32from abc import ABCMeta, abstractmethod 

33from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple 

34 

35import pandas 

36 

37from mlos_bench.storage.base_tunable_config_data import TunableConfigData 

38from mlos_bench.util import strtobool 

39 

40if TYPE_CHECKING: 

41 from mlos_bench.storage.base_trial_data import TrialData 

42 from mlos_bench.storage.base_tunable_config_trial_group_data import ( 

43 TunableConfigTrialGroupData, 

44 ) 

45 

46 

47class ExperimentData(metaclass=ABCMeta): 

48 """ 

49 Base interface for accessing the stored experiment benchmark data. 

50 

51 An experiment groups together a set of trials that are run with a given set of 

52 scripts and mlos_bench configuration files. 

53 """ 

54 

55 RESULT_COLUMN_PREFIX = "result." 

56 """ 

57 Prefix given to columns in :py:attr:`.ExperimentData.results_df` that contain trial 

58 results metrics. 

59 

60 For example, if the result metric is "time", the column name will be "result.time". 

61 """ 

62 

63 CONFIG_COLUMN_PREFIX = "config." 

64 """ 

65 Prefix given to columns in :py:attr:`.ExperimentData.results_df` that contain trial 

66 config parameters. 

67 

68 For example, if the config parameter name is "param1", the column name will be 

69 "config.param1". 

70 """ 

71 

72 def __init__( # pylint: disable=too-many-arguments 

73 self, 

74 *, 

75 experiment_id: str, 

76 description: str, 

77 root_env_config: str, 

78 git_repo: str, 

79 git_commit: str, 

80 ): 

81 self._experiment_id = experiment_id 

82 self._description = description 

83 self._root_env_config = root_env_config 

84 self._git_repo = git_repo 

85 self._git_commit = git_commit 

86 

87 @property 

88 def experiment_id(self) -> str: 

89 """ID of the experiment.""" 

90 return self._experiment_id 

91 

92 @property 

93 def description(self) -> str: 

94 """Description of the experiment.""" 

95 return self._description 

96 

97 @property 

98 def root_env_config(self) -> Tuple[str, str, str]: 

99 """ 

100 Root environment configuration. 

101 

102 Returns 

103 ------- 

104 (root_env_config, git_repo, git_commit) : Tuple[str, str, str] 

105 A tuple of (root_env_config, git_repo, git_commit) for the root environment. 

106 """ 

107 return (self._root_env_config, self._git_repo, self._git_commit) 

108 

109 def __repr__(self) -> str: 

110 return f"Experiment :: {self._experiment_id}: '{self._description}'" 

111 

112 @property 

113 @abstractmethod 

114 def objectives(self) -> Dict[str, Literal["min", "max"]]: 

115 """ 

116 Retrieve the experiment's objectives data from the storage. 

117 

118 Returns 

119 ------- 

120 objectives : Dict[str, Literal["min", "max"]] 

121 A dictionary of the experiment's objective names (optimization_targets) 

122 and their directions (e.g., min or max). 

123 """ 

124 

125 @property 

126 @abstractmethod 

127 def trials(self) -> Dict[int, "TrialData"]: 

128 """ 

129 Retrieve the experiment's trials' data from the storage. 

130 

131 Returns 

132 ------- 

133 trials : Dict[int, TrialData] 

134 A dictionary of the trials' data, keyed by trial id. 

135 """ 

136 

137 @property 

138 @abstractmethod 

139 def tunable_configs(self) -> Dict[int, TunableConfigData]: 

140 """ 

141 Retrieve the experiment's (tunable) configs' data from the storage. 

142 

143 Returns 

144 ------- 

145 trials : Dict[int, TunableConfigData] 

146 A dictionary of the configs' data, keyed by (tunable) config id. 

147 """ 

148 

149 @property 

150 @abstractmethod 

151 def tunable_config_trial_groups(self) -> Dict[int, "TunableConfigTrialGroupData"]: 

152 """ 

153 Retrieve the Experiment's (Tunable) Config Trial Group data from the storage. 

154 

155 Returns 

156 ------- 

157 trials : Dict[int, TunableConfigTrialGroupData] 

158 A dictionary of the trials' data, keyed by (tunable) by config id. 

159 """ 

160 

161 @property 

162 def default_tunable_config_id(self) -> Optional[int]: 

163 """ 

164 Retrieves the (tunable) config id for the default tunable values for this 

165 experiment. 

166 

167 Note: this is by *default* the first trial executed for this experiment. 

168 However, it is currently possible that the user changed the tunables config 

169 in between resumptions of an experiment. 

170 

171 Returns 

172 ------- 

173 int 

174 """ 

175 # Note: this implementation is quite inefficient and may be better 

176 # reimplemented by subclasses. 

177 

178 # Check to see if we included it in trial metadata. 

179 trials_items = sorted(self.trials.items()) 

180 if not trials_items: 

181 return None 

182 for _trial_id, trial in trials_items: 

183 # Take the first config id marked as "defaults" when it was instantiated. 

184 if strtobool(str(trial.metadata_dict.get("is_defaults", False))): 

185 return trial.tunable_config_id 

186 # Fallback (min trial_id) 

187 return trials_items[0][1].tunable_config_id 

188 

189 @property 

190 @abstractmethod 

191 def results_df(self) -> pandas.DataFrame: 

192 """ 

193 Retrieve all experimental results as a single DataFrame. 

194 

195 Returns 

196 ------- 

197 results : pandas.DataFrame 

198 A DataFrame with configurations and results from all trials of the experiment. 

199 Has columns 

200 [trial_id, tunable_config_id, tunable_config_trial_group_id, ts_start, ts_end, status] 

201 followed by tunable config parameters (prefixed with "config.") and 

202 trial results (prefixed with "result."). The latter can be NULLs if the 

203 trial was not successful. 

204 

205 See Also 

206 -------- 

207 :py:attr:`.ExperimentData.CONFIG_COLUMN_PREFIX` 

208 :py:attr:`.ExperimentData.RESULT_COLUMN_PREFIX` 

209 """