Coverage for mlos_bench/mlos_bench/storage/base_experiment_data.py: 85%
47 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Base interface for accessing the stored benchmark experiment data.
8An experiment is a collection of trials that are run with a given set of scripts and
9target system.
11Each trial is associated with a configuration (e.g., set of tunable parameters), but
12multiple trials may use the same config (e.g., for repeat run variability analysis).
14See Also
15--------
16ExperimentData.results_df :
17 Retrieves a pandas DataFrame of the Experiment's trials' results data.
18ExperimentData.trials :
19 Retrieves a dictionary of the Experiment's trials' data.
20ExperimentData.tunable_configs :
21 Retrieves a dictionary of the Experiment's sampled configs data.
22ExperimentData.tunable_config_trial_groups :
23 Retrieves a dictionary of the Experiment's trials' data, grouped by shared
24 tunable config.
25mlos_bench.storage.base_trial_data.TrialData :
26 Base interface for accessing the stored benchmark trial data.
27"""
29from abc import ABCMeta, abstractmethod
30from typing import TYPE_CHECKING, Dict, Literal, Optional, Tuple
32import pandas
34from mlos_bench.storage.base_tunable_config_data import TunableConfigData
35from mlos_bench.util import strtobool
37if TYPE_CHECKING:
38 from mlos_bench.storage.base_trial_data import TrialData
39 from mlos_bench.storage.base_tunable_config_trial_group_data import (
40 TunableConfigTrialGroupData,
41 )
44class ExperimentData(metaclass=ABCMeta):
45 """
46 Base interface for accessing the stored experiment benchmark data.
48 An experiment groups together a set of trials that are run with a given set of
49 scripts and mlos_bench configuration files.
50 """
52 RESULT_COLUMN_PREFIX = "result."
53 CONFIG_COLUMN_PREFIX = "config."
55 def __init__( # pylint: disable=too-many-arguments
56 self,
57 *,
58 experiment_id: str,
59 description: str,
60 root_env_config: str,
61 git_repo: str,
62 git_commit: str,
63 ):
64 self._experiment_id = experiment_id
65 self._description = description
66 self._root_env_config = root_env_config
67 self._git_repo = git_repo
68 self._git_commit = git_commit
70 @property
71 def experiment_id(self) -> str:
72 """ID of the experiment."""
73 return self._experiment_id
75 @property
76 def description(self) -> str:
77 """Description of the experiment."""
78 return self._description
80 @property
81 def root_env_config(self) -> Tuple[str, str, str]:
82 """
83 Root environment configuration.
85 Returns
86 -------
87 root_env_config : Tuple[str, str, str]
88 A tuple of (root_env_config, git_repo, git_commit) for the root environment.
89 """
90 return (self._root_env_config, self._git_repo, self._git_commit)
92 def __repr__(self) -> str:
93 return f"Experiment :: {self._experiment_id}: '{self._description}'"
95 @property
96 @abstractmethod
97 def objectives(self) -> Dict[str, Literal["min", "max"]]:
98 """
99 Retrieve the experiment's objectives data from the storage.
101 Returns
102 -------
103 objectives : Dict[str, Literal["min", "max"]]
104 A dictionary of the experiment's objective names (optimization_targets)
105 and their directions (e.g., min or max).
106 """
108 @property
109 @abstractmethod
110 def trials(self) -> Dict[int, "TrialData"]:
111 """
112 Retrieve the experiment's trials' data from the storage.
114 Returns
115 -------
116 trials : Dict[int, TrialData]
117 A dictionary of the trials' data, keyed by trial id.
118 """
120 @property
121 @abstractmethod
122 def tunable_configs(self) -> Dict[int, TunableConfigData]:
123 """
124 Retrieve the experiment's (tunable) configs' data from the storage.
126 Returns
127 -------
128 trials : Dict[int, TunableConfigData]
129 A dictionary of the configs' data, keyed by (tunable) config id.
130 """
132 @property
133 @abstractmethod
134 def tunable_config_trial_groups(self) -> Dict[int, "TunableConfigTrialGroupData"]:
135 """
136 Retrieve the Experiment's (Tunable) Config Trial Group data from the storage.
138 Returns
139 -------
140 trials : Dict[int, TunableConfigTrialGroupData]
141 A dictionary of the trials' data, keyed by (tunable) by config id.
142 """
144 @property
145 def default_tunable_config_id(self) -> Optional[int]:
146 """
147 Retrieves the (tunable) config id for the default tunable values for this
148 experiment.
150 Note: this is by *default* the first trial executed for this experiment.
151 However, it is currently possible that the user changed the tunables config
152 in between resumptions of an experiment.
154 Returns
155 -------
156 int
157 """
158 # Note: this implementation is quite inefficient and may be better
159 # reimplemented by subclasses.
161 # Check to see if we included it in trial metadata.
162 trials_items = sorted(self.trials.items())
163 if not trials_items:
164 return None
165 for _trial_id, trial in trials_items:
166 # Take the first config id marked as "defaults" when it was instantiated.
167 if strtobool(str(trial.metadata_dict.get("is_defaults", False))):
168 return trial.tunable_config_id
169 # Fallback (min trial_id)
170 return trials_items[0][1].tunable_config_id
172 @property
173 @abstractmethod
174 def results_df(self) -> pandas.DataFrame:
175 """
176 Retrieve all experimental results as a single DataFrame.
178 Returns
179 -------
180 results : pandas.DataFrame
181 A DataFrame with configurations and results from all trials of the experiment.
182 Has columns
183 [trial_id, tunable_config_id, tunable_config_trial_group_id, ts_start, ts_end, status]
184 followed by tunable config parameters (prefixed with "config.") and
185 trial results (prefixed with "result."). The latter can be NULLs if the
186 trial was not successful.
187 """