Coverage for mlos_bench/mlos_bench/storage/base_experiment_data.py: 85%

48 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-06 00:35 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Base interface for accessing the stored benchmark experiment data. 

7""" 

8 

9from abc import ABCMeta, abstractmethod 

10from distutils.util import strtobool # pylint: disable=deprecated-module 

11from typing import Dict, Literal, Optional, Tuple, TYPE_CHECKING 

12 

13import pandas 

14 

15from mlos_bench.storage.base_tunable_config_data import TunableConfigData 

16 

17if TYPE_CHECKING: 

18 from mlos_bench.storage.base_trial_data import TrialData 

19 from mlos_bench.storage.base_tunable_config_trial_group_data import TunableConfigTrialGroupData 

20 

21 

22class ExperimentData(metaclass=ABCMeta): 

23 """ 

24 Base interface for accessing the stored experiment benchmark data. 

25 

26 An experiment groups together a set of trials that are run with a given set of 

27 scripts and mlos_bench configuration files. 

28 """ 

29 

30 RESULT_COLUMN_PREFIX = "result." 

31 CONFIG_COLUMN_PREFIX = "config." 

32 

33 def __init__(self, *, 

34 experiment_id: str, 

35 description: str, 

36 root_env_config: str, 

37 git_repo: str, 

38 git_commit: str): 

39 self._experiment_id = experiment_id 

40 self._description = description 

41 self._root_env_config = root_env_config 

42 self._git_repo = git_repo 

43 self._git_commit = git_commit 

44 

45 @property 

46 def experiment_id(self) -> str: 

47 """ 

48 ID of the experiment. 

49 """ 

50 return self._experiment_id 

51 

52 @property 

53 def description(self) -> str: 

54 """ 

55 Description of the experiment. 

56 """ 

57 return self._description 

58 

59 @property 

60 def root_env_config(self) -> Tuple[str, str, str]: 

61 """ 

62 Root environment configuration. 

63 

64 Returns 

65 ------- 

66 root_env_config : Tuple[str, str, str] 

67 A tuple of (root_env_config, git_repo, git_commit) for the root environment. 

68 """ 

69 return (self._root_env_config, self._git_repo, self._git_commit) 

70 

71 def __repr__(self) -> str: 

72 return f"Experiment :: {self._experiment_id}: '{self._description}'" 

73 

74 @property 

75 @abstractmethod 

76 def objectives(self) -> Dict[str, Literal["min", "max"]]: 

77 """ 

78 Retrieve the experiment's objectives data from the storage. 

79 

80 Returns 

81 ------- 

82 objectives : Dict[str, objective] 

83 A dictionary of the experiment's objective names (optimization_targets) 

84 and their directions (e.g., min or max). 

85 """ 

86 

87 @property 

88 @abstractmethod 

89 def trials(self) -> Dict[int, "TrialData"]: 

90 """ 

91 Retrieve the experiment's trials' data from the storage. 

92 

93 Returns 

94 ------- 

95 trials : Dict[int, TrialData] 

96 A dictionary of the trials' data, keyed by trial id. 

97 """ 

98 

99 @property 

100 @abstractmethod 

101 def tunable_configs(self) -> Dict[int, TunableConfigData]: 

102 """ 

103 Retrieve the experiment's (tunable) configs' data from the storage. 

104 

105 Returns 

106 ------- 

107 trials : Dict[int, TunableConfigData] 

108 A dictionary of the configs' data, keyed by (tunable) config id. 

109 """ 

110 

111 @property 

112 @abstractmethod 

113 def tunable_config_trial_groups(self) -> Dict[int, "TunableConfigTrialGroupData"]: 

114 """ 

115 Retrieve the Experiment's (Tunable) Config Trial Group data from the storage. 

116 

117 Returns 

118 ------- 

119 trials : Dict[int, TunableConfigTrialGroupData] 

120 A dictionary of the trials' data, keyed by (tunable) by config id. 

121 """ 

122 

123 @property 

124 def default_tunable_config_id(self) -> Optional[int]: 

125 """ 

126 Retrieves the (tunable) config id for the default tunable values for this experiment. 

127 

128 Note: this is by *default* the first trial executed for this experiment. 

129 However, it is currently possible that the user changed the tunables config 

130 in between resumptions of an experiment. 

131 

132 Returns 

133 ------- 

134 int 

135 """ 

136 # Note: this implementation is quite inefficient and may be better 

137 # reimplemented by subclasses. 

138 

139 # Check to see if we included it in trial metadata. 

140 trials_items = sorted(self.trials.items()) 

141 if not trials_items: 

142 return None 

143 for (_trial_id, trial) in trials_items: 

144 # Take the first config id marked as "defaults" when it was instantiated. 

145 if strtobool(str(trial.metadata_dict.get('is_defaults', False))): 

146 return trial.tunable_config_id 

147 # Fallback (min trial_id) 

148 return trials_items[0][1].tunable_config_id 

149 

150 @property 

151 @abstractmethod 

152 def results_df(self) -> pandas.DataFrame: 

153 """ 

154 Retrieve all experimental results as a single DataFrame. 

155 

156 Returns 

157 ------- 

158 results : pandas.DataFrame 

159 A DataFrame with configurations and results from all trials of the experiment. 

160 Has columns [trial_id, tunable_config_id, tunable_config_trial_group_id, ts_start, ts_end, status] 

161 followed by tunable config parameters (prefixed with "config.") and 

162 trial results (prefixed with "result."). The latter can be NULLs if the 

163 trial was not successful. 

164 """