Coverage for mlos_core/mlos_core/data_classes.py: 93%
148 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-20 00:44 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-20 00:44 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Data classes for ``mlos_core`` used to pass around configurations, observations, and
7suggestions.
9``mlos_core`` uses :external:py:mod:`pandas` :external:py:class:`~pandas.DataFrame`
10s and :external:py:class:`~pandas.Series` to represent configurations and scores and
11context (information about where the configuration was evaluated).
13These modules encapsulate tuples of those for easier passing around and manipulation.
14"""
15from typing import Any, Iterable, Iterator, Optional
17import pandas as pd
18from ConfigSpace import Configuration, ConfigurationSpace
20from mlos_core.util import compare_optional_dataframe, compare_optional_series
23class Observation:
24 """A single observation of a configuration."""
26 def __init__(
27 self,
28 *,
29 config: pd.Series,
30 score: pd.Series = pd.Series(),
31 context: Optional[pd.Series] = None,
32 metadata: Optional[pd.Series] = None,
33 ):
34 """
35 Creates a new Observation object.
37 Parameters
38 ----------
39 config : pandas.Series
40 The configuration observed.
41 score : pandas.Series
42 The score metrics observed.
43 context : Optional[pandas.Series]
44 The context in which the configuration was evaluated.
45 Not Yet Implemented.
46 metadata: Optional[pandas.Series]
47 The metadata in which the configuration was evaluated
48 """
49 self._config = config
50 self._score = score
51 self._context = context
52 self._metadata = metadata
54 @property
55 def config(self) -> pd.Series:
56 """Gets (a copy of) the config of the Observation."""
57 return self._config.copy()
59 @property
60 def score(self) -> pd.Series:
61 """Gets (a copy of) the score of the Observation."""
62 return self._score.copy()
64 @property
65 def context(self) -> Optional[pd.Series]:
66 """Gets (a copy of) the context of the Observation."""
67 return self._context.copy() if self._context is not None else None
69 @property
70 def metadata(self) -> Optional[pd.Series]:
71 """Gets (a copy of) the metadata of the Observation."""
72 return self._metadata.copy() if self._metadata is not None else None
74 def to_suggestion(self) -> "Suggestion":
75 """
76 Converts the observation to a suggestion.
78 Returns
79 -------
80 Suggestion
81 The suggestion.
82 """
83 return Suggestion(
84 config=self.config,
85 context=self.context,
86 metadata=self.metadata,
87 )
89 def __repr__(self) -> str:
90 return (
91 f"Observation(config={self._config}, score={self._score}, "
92 f"context={self._context}, metadata={self._metadata})"
93 )
95 def __eq__(self, other: Any) -> bool:
96 if not isinstance(other, Observation):
97 return False
99 if not self._config.equals(other._config):
100 return False
101 if not self._score.equals(other._score):
102 return False
103 if not compare_optional_series(self._context, other._context):
104 return False
105 if not compare_optional_series(self._metadata, other._metadata):
106 return False
108 return True
110 def __ne__(self, other: Any) -> bool:
111 return not self.__eq__(other)
114class Observations:
115 """A set of observations of a configuration scores."""
117 def __init__( # pylint: disable=too-many-arguments
118 self,
119 *,
120 configs: pd.DataFrame = pd.DataFrame(),
121 scores: pd.DataFrame = pd.DataFrame(),
122 contexts: Optional[pd.DataFrame] = None,
123 metadata: Optional[pd.DataFrame] = None,
124 observations: Optional[Iterable[Observation]] = None,
125 ):
126 """
127 Creates a new Observation object.
129 Can accept either a set of Observations or a collection of aligned config and
130 score (and optionally context) dataframes.
132 If both are provided the two sets will be merged.
134 Parameters
135 ----------
136 configs : pandas.DataFrame
137 Pandas dataframe containing configurations. Column names are the parameter names.
138 scores : pandas.DataFrame
139 The score metrics observed in a dataframe.
140 contexts : Optional[pandas.DataFrame]
141 The context in which the configuration was evaluated.
142 Not Yet Implemented.
143 metadata: Optional[pandas.DataFrame]
144 The metadata in which the configuration was evaluated
145 Not Yet Implemented.
146 """
147 if observations is None:
148 observations = []
149 if any(observations):
150 configs = pd.concat([obs.config.to_frame().T for obs in observations])
151 scores = pd.concat([obs.score.to_frame().T for obs in observations])
153 if sum(obs.context is None for obs in observations) == 0:
154 contexts = pd.concat(
155 [obs.context.to_frame().T for obs in observations] # type: ignore[union-attr]
156 )
157 else:
158 contexts = None
159 if sum(obs.metadata is None for obs in observations) == 0:
160 metadata = pd.concat(
161 [obs.metadata.to_frame().T for obs in observations] # type: ignore[union-attr]
162 )
163 else:
164 metadata = None
165 assert len(configs.index) == len(
166 scores.index
167 ), "config and score must have the same length"
168 if contexts is not None:
169 assert len(configs.index) == len(
170 contexts.index
171 ), "config and context must have the same length"
172 if metadata is not None:
173 assert len(configs.index) == len(
174 metadata.index
175 ), "config and metadata must have the same length"
176 self._configs = configs.reset_index(drop=True)
177 self._scores = scores.reset_index(drop=True)
178 self._contexts = None if contexts is None else contexts.reset_index(drop=True)
179 self._metadata = None if metadata is None else metadata.reset_index(drop=True)
181 @property
182 def configs(self) -> pd.DataFrame:
183 """Gets a copy of the configs of the Observations."""
184 return self._configs.copy()
186 @property
187 def scores(self) -> pd.DataFrame:
188 """Gets a copy of the scores of the Observations."""
189 return self._scores.copy()
191 @property
192 def contexts(self) -> Optional[pd.DataFrame]:
193 """Gets a copy of the contexts of the Observations."""
194 return self._contexts.copy() if self._contexts is not None else None
196 @property
197 def metadata(self) -> Optional[pd.DataFrame]:
198 """Gets a copy of the metadata of the Observations."""
199 return self._metadata.copy() if self._metadata is not None else None
201 def filter_by_index(self, index: pd.Index) -> "Observations":
202 """
203 Filters the observation by the given indices.
205 Parameters
206 ----------
207 index : pandas.Index
208 The indices to filter by.
210 Returns
211 -------
212 Observation
213 The filtered observation.
214 """
215 return Observations(
216 configs=self._configs.loc[index].copy(),
217 scores=self._scores.loc[index].copy(),
218 contexts=None if self._contexts is None else self._contexts.loc[index].copy(),
219 metadata=None if self._metadata is None else self._metadata.loc[index].copy(),
220 )
222 def append(self, observation: Observation) -> None:
223 """
224 Appends the given observation to this observation.
226 Parameters
227 ----------
228 observation : Observation
229 The observation to append.
230 """
231 config = observation.config.to_frame().T
232 score = observation.score.to_frame().T
233 context = None if observation.context is None else observation.context.to_frame().T
234 metadata = None if observation.metadata is None else observation.metadata.to_frame().T
235 if len(self._configs.index) == 0:
236 self._configs = config
237 self._scores = score
238 self._contexts = context
239 self._metadata = metadata
240 assert set(self.configs.index) == set(
241 self.scores.index
242 ), "config and score must have the same index"
243 return
245 self._configs = pd.concat([self._configs, config]).reset_index(drop=True)
246 self._scores = pd.concat([self._scores, score]).reset_index(drop=True)
247 assert set(self.configs.index) == set(
248 self.scores.index
249 ), "config and score must have the same index"
251 if self._contexts is not None:
252 assert context is not None, (
253 "context of appending observation must not be null "
254 "if context of prior observation is not null"
255 )
256 self._contexts = pd.concat([self._contexts, context]).reset_index(drop=True)
257 assert self._configs.index.equals(
258 self._contexts.index
259 ), "config and context must have the same index"
260 else:
261 assert context is None, (
262 "context of appending observation must be null "
263 "if context of prior observation is null"
264 )
265 if self._metadata is not None:
266 assert metadata is not None, (
267 "context of appending observation must not be null "
268 "if metadata of prior observation is not null"
269 )
270 self._metadata = pd.concat([self._metadata, metadata]).reset_index(drop=True)
271 assert self._configs.index.equals(
272 self._metadata.index
273 ), "config and metadata must have the same index"
274 else:
275 assert metadata is None, (
276 "context of appending observation must be null "
277 "if metadata of prior observation is null"
278 )
280 def __len__(self) -> int:
281 return len(self._configs.index)
283 def __iter__(self) -> Iterator["Observation"]:
284 for idx in self._configs.index:
285 yield Observation(
286 config=self._configs.loc[idx],
287 score=self._scores.loc[idx],
288 context=None if self._contexts is None else self._contexts.loc[idx],
289 metadata=None if self._metadata is None else self._metadata.loc[idx],
290 )
292 def __repr__(self) -> str:
293 return (
294 f"Observation(configs={self._configs}, score={self._scores}, "
295 "contexts={self._contexts}, metadata={self._metadata})"
296 )
298 def __eq__(self, other: Any) -> bool:
299 if not isinstance(other, Observations):
300 return False
302 if not self._configs.equals(other._configs):
303 return False
304 if not self._scores.equals(other._scores):
305 return False
306 if not compare_optional_dataframe(self._contexts, other._contexts):
307 return False
308 if not compare_optional_dataframe(self._metadata, other._metadata):
309 return False
311 return True
313 # required as per: https://stackoverflow.com/questions/30643236/does-ne-use-an-overridden-eq
314 def __ne__(self, other: Any) -> bool:
315 return not self.__eq__(other)
318class Suggestion:
319 """
320 A single suggestion for a configuration.
322 A Suggestion is an Observation that has not yet been scored. Evaluating the
323 Suggestion and calling `complete(scores)` can convert it to an Observation.
324 """
326 def __init__(
327 self,
328 *,
329 config: pd.Series,
330 context: Optional[pd.Series] = None,
331 metadata: Optional[pd.Series] = None,
332 ):
333 """
334 Creates a new Suggestion.
336 Parameters
337 ----------
338 config : pandas.Series
339 The configuration suggested.
340 context : Optional[pandas.Series]
341 The context for this suggestion, by default None
342 metadata : Optional[pandas.Series]
343 Any metadata provided by the underlying optimizer, by default None
344 """
345 self._config = config
346 self._context = context
347 self._metadata = metadata
349 @property
350 def config(self) -> pd.Series:
351 """Gets (a copy of) the config of the Suggestion."""
352 return self._config.copy()
354 @property
355 def context(self) -> Optional[pd.Series]:
356 """Gets (a copy of) the context of the Suggestion."""
357 return self._context.copy() if self._context is not None else None
359 @property
360 def metadata(self) -> Optional[pd.Series]:
361 """Gets (a copy of) the metadata of the Suggestion."""
362 return self._metadata.copy() if self._metadata is not None else None
364 def complete(self, score: pd.Series) -> Observation:
365 """
366 Completes the Suggestion by adding a score to turn it into an Observation.
368 Parameters
369 ----------
370 score : pandas.Series
371 The score metrics observed.
373 Returns
374 -------
375 Observation
376 The observation of the suggestion.
377 """
378 return Observation(
379 config=self.config,
380 score=score,
381 context=self.context,
382 metadata=self.metadata,
383 )
385 def to_configspace_config(self, space: ConfigurationSpace) -> Configuration:
386 """
387 Convert a Configuration Space to a Configuration.
389 Parameters
390 ----------
391 space : ConfigSpace.ConfigurationSpace
392 The ConfigurationSpace to be converted.
394 Returns
395 -------
396 ConfigSpace.Configuration
397 The output Configuration.
398 """
399 return Configuration(space, values=self._config.dropna().to_dict())
401 def __repr__(self) -> str:
402 return (
403 f"Suggestion(config={self._config}, context={self._context}, "
404 "metadata={self._metadata})"
405 )
407 def __eq__(self, other: Any) -> bool:
408 if not isinstance(other, Suggestion):
409 return False
411 if not self._config.equals(other._config):
412 return False
413 if not compare_optional_series(self._context, other._context):
414 return False
415 if not compare_optional_series(self._metadata, other._metadata):
416 return False
418 return True
420 def __ne__(self, other: Any) -> bool:
421 return not self.__eq__(other)