Coverage for mlos_core/mlos_core/data_classes.py: 93%
157 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-14 00:55 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-14 00:55 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Data classes for ``mlos_core`` used to pass around configurations, observations, and
7suggestions.
9``mlos_core`` uses :external:py:mod:`pandas` :external:py:class:`~pandas.DataFrame`
10s and :external:py:class:`~pandas.Series` to represent configurations and scores and
11context (information about where the configuration was evaluated).
13These modules encapsulate tuples of those for easier passing around and manipulation.
14"""
15from collections.abc import Iterable, Iterator
16from typing import Any
18import pandas as pd
19from ConfigSpace import Configuration, ConfigurationSpace
21from mlos_core.util import compare_optional_dataframe, compare_optional_series
24class Observation:
25 """A single observation of a configuration."""
27 def __init__(
28 self,
29 *,
30 config: pd.Series,
31 score: pd.Series = pd.Series(),
32 context: pd.Series | None = None,
33 metadata: pd.Series | None = None,
34 ):
35 """
36 Creates a new Observation object.
38 Parameters
39 ----------
40 config : pandas.Series
41 The configuration observed.
42 score : pandas.Series
43 The score metrics observed.
44 context : pandas.Series | None
45 The context in which the configuration was evaluated.
46 Not Yet Implemented.
47 metadata: pandas.Series | None
48 The metadata in which the configuration was evaluated
49 """
50 self._config = config
51 self._score = score
52 self._context = context
53 self._metadata = metadata
55 @property
56 def config(self) -> pd.Series:
57 """Gets (a copy of) the config of the Observation."""
58 return self._config.copy()
60 @property
61 def score(self) -> pd.Series:
62 """Gets (a copy of) the score of the Observation."""
63 return self._score.copy()
65 @property
66 def context(self) -> pd.Series | None:
67 """Gets (a copy of) the context of the Observation."""
68 return self._context.copy() if self._context is not None else None
70 @property
71 def metadata(self) -> pd.Series | None:
72 """Gets (a copy of) the metadata of the Observation."""
73 return self._metadata.copy() if self._metadata is not None else None
75 def to_suggestion(self) -> "Suggestion":
76 """
77 Converts the observation to a suggestion.
79 Returns
80 -------
81 Suggestion
82 The suggestion.
83 """
84 return Suggestion(
85 config=self.config,
86 context=self.context,
87 metadata=self.metadata,
88 )
90 def __repr__(self) -> str:
91 return (
92 f"Observation(config={self._config}, score={self._score}, "
93 f"context={self._context}, metadata={self._metadata})"
94 )
96 def __eq__(self, other: Any) -> bool:
97 if not isinstance(other, Observation):
98 return False
100 if not self._config.equals(other._config):
101 return False
102 if not self._score.equals(other._score):
103 return False
104 if not compare_optional_series(self._context, other._context):
105 return False
106 if not compare_optional_series(self._metadata, other._metadata):
107 return False
109 return True
111 def __ne__(self, other: Any) -> bool:
112 return not self.__eq__(other)
115class Observations:
116 """A set of observations of a configuration scores."""
118 def __init__( # pylint: disable=too-many-arguments
119 self,
120 *,
121 configs: pd.DataFrame = pd.DataFrame(),
122 scores: pd.DataFrame = pd.DataFrame(),
123 contexts: pd.DataFrame | None = None,
124 metadata: pd.DataFrame | None = None,
125 observations: Iterable[Observation] | None = None,
126 ):
127 """
128 Creates a new Observation object.
130 Can accept either a set of Observations or a collection of aligned config and
131 score (and optionally context) dataframes.
133 If both are provided the two sets will be merged.
135 Parameters
136 ----------
137 configs : pandas.DataFrame
138 Pandas dataframe containing configurations. Column names are the parameter names.
139 scores : pandas.DataFrame
140 The score metrics observed in a dataframe.
141 contexts : pandas.DataFrame | None
142 The context in which the configuration was evaluated.
143 Not Yet Implemented.
144 metadata: pandas.DataFrame | None
145 The metadata in which the configuration was evaluated
146 Not Yet Implemented.
147 """
148 if observations is None:
149 observations = []
150 if any(observations):
151 configs = pd.concat([obs.config.to_frame().T for obs in observations])
152 scores = pd.concat([obs.score.to_frame().T for obs in observations])
154 if sum(obs.context is None for obs in observations) == 0:
155 contexts = pd.concat(
156 [obs.context.to_frame().T for obs in observations] # type: ignore[union-attr]
157 )
158 else:
159 contexts = None
160 if sum(obs.metadata is None for obs in observations) == 0:
161 metadata = pd.concat(
162 [obs.metadata.to_frame().T for obs in observations] # type: ignore[union-attr]
163 )
164 else:
165 metadata = None
166 assert len(configs.index) == len(
167 scores.index
168 ), "config and score must have the same length"
169 if contexts is not None:
170 assert len(configs.index) == len(
171 contexts.index
172 ), "config and context must have the same length"
173 if metadata is not None:
174 assert len(configs.index) == len(
175 metadata.index
176 ), "config and metadata must have the same length"
177 self._configs = configs.reset_index(drop=True)
178 self._scores = scores.reset_index(drop=True)
179 self._contexts = None if contexts is None else contexts.reset_index(drop=True)
180 self._metadata = None if metadata is None else metadata.reset_index(drop=True)
182 @property
183 def configs(self) -> pd.DataFrame:
184 """Gets a copy of the configs of the Observations."""
185 return self._configs.copy()
187 @property
188 def scores(self) -> pd.DataFrame:
189 """Gets a copy of the scores of the Observations."""
190 return self._scores.copy()
192 @property
193 def contexts(self) -> pd.DataFrame | None:
194 """Gets a copy of the contexts of the Observations."""
195 return self._contexts.copy() if self._contexts is not None else None
197 @property
198 def metadata(self) -> pd.DataFrame | None:
199 """Gets a copy of the metadata of the Observations."""
200 return self._metadata.copy() if self._metadata is not None else None
202 def filter_by_index(self, index: pd.Index) -> "Observations":
203 """
204 Filters the observation by the given indices.
206 Parameters
207 ----------
208 index : pandas.Index
209 The indices to filter by.
211 Returns
212 -------
213 Observation
214 The filtered observation.
215 """
216 return Observations(
217 configs=self._configs.loc[index].copy(),
218 scores=self._scores.loc[index].copy(),
219 contexts=None if self._contexts is None else self._contexts.loc[index].copy(),
220 metadata=None if self._metadata is None else self._metadata.loc[index].copy(),
221 )
223 def append(self, observation: Observation) -> None:
224 """
225 Appends the given observation to this observation.
227 Parameters
228 ----------
229 observation : Observation
230 The observation to append.
231 """
232 config = observation.config.to_frame().T
233 score = observation.score.to_frame().T
234 context = None if observation.context is None else observation.context.to_frame().T
235 metadata = None if observation.metadata is None else observation.metadata.to_frame().T
236 if len(self._configs.index) == 0:
237 self._configs = config
238 self._scores = score
239 self._contexts = context
240 self._metadata = metadata
241 assert set(self.configs.index) == set(
242 self.scores.index
243 ), "config and score must have the same index"
244 return
246 self._configs = pd.concat([self._configs, config]).reset_index(drop=True)
247 self._scores = pd.concat([self._scores, score]).reset_index(drop=True)
248 assert set(self.configs.index) == set(
249 self.scores.index
250 ), "config and score must have the same index"
252 if self._contexts is not None:
253 assert context is not None, (
254 "context of appending observation must not be null "
255 "if context of prior observation is not null"
256 )
257 self._contexts = pd.concat([self._contexts, context]).reset_index(drop=True)
258 assert self._configs.index.equals(
259 self._contexts.index
260 ), "config and context must have the same index"
261 else:
262 assert context is None, (
263 "context of appending observation must be null "
264 "if context of prior observation is null"
265 )
266 if self._metadata is not None:
267 assert metadata is not None, (
268 "context of appending observation must not be null "
269 "if metadata of prior observation is not null"
270 )
271 self._metadata = pd.concat([self._metadata, metadata]).reset_index(drop=True)
272 assert self._configs.index.equals(
273 self._metadata.index
274 ), "config and metadata must have the same index"
275 else:
276 assert metadata is None, (
277 "context of appending observation must be null "
278 "if metadata of prior observation is null"
279 )
281 def __len__(self) -> int:
282 return len(self._configs.index)
284 def __iter__(self) -> Iterator["Observation"]:
285 for idx in self._configs.index:
286 config = self._configs.loc[idx]
287 assert isinstance(config, pd.Series)
288 score = self._scores.loc[idx]
289 assert isinstance(score, pd.Series)
290 context = self._contexts.loc[idx] if self._contexts is not None else None
291 assert isinstance(context, (pd.Series, type(None)))
292 metadata = self._metadata.loc[idx] if self._metadata is not None else None
293 assert isinstance(metadata, (pd.Series, type(None)))
294 yield Observation(
295 config=config,
296 score=score,
297 context=context,
298 metadata=metadata,
299 )
301 def __repr__(self) -> str:
302 return (
303 f"Observation(configs={self._configs}, score={self._scores}, "
304 "contexts={self._contexts}, metadata={self._metadata})"
305 )
307 def __eq__(self, other: Any) -> bool:
308 if not isinstance(other, Observations):
309 return False
311 if not self._configs.equals(other._configs):
312 return False
313 if not self._scores.equals(other._scores):
314 return False
315 if not compare_optional_dataframe(self._contexts, other._contexts):
316 return False
317 if not compare_optional_dataframe(self._metadata, other._metadata):
318 return False
320 return True
322 # required as per: https://stackoverflow.com/questions/30643236/does-ne-use-an-overridden-eq
323 def __ne__(self, other: Any) -> bool:
324 return not self.__eq__(other)
327class Suggestion:
328 """
329 A single suggestion for a configuration.
331 A Suggestion is an Observation that has not yet been scored. Evaluating the
332 Suggestion and calling `complete(scores)` can convert it to an Observation.
333 """
335 def __init__(
336 self,
337 *,
338 config: pd.Series,
339 context: pd.Series | None = None,
340 metadata: pd.Series | None = None,
341 ):
342 """
343 Creates a new Suggestion.
345 Parameters
346 ----------
347 config : pandas.Series
348 The configuration suggested.
349 context : pandas.Series | None
350 The context for this suggestion, by default None
351 metadata : pandas.Series | None
352 Any metadata provided by the underlying optimizer, by default None
353 """
354 self._config = config
355 self._context = context
356 self._metadata = metadata
358 @property
359 def config(self) -> pd.Series:
360 """Gets (a copy of) the config of the Suggestion."""
361 return self._config.copy()
363 @property
364 def context(self) -> pd.Series | None:
365 """Gets (a copy of) the context of the Suggestion."""
366 return self._context.copy() if self._context is not None else None
368 @property
369 def metadata(self) -> pd.Series | None:
370 """Gets (a copy of) the metadata of the Suggestion."""
371 return self._metadata.copy() if self._metadata is not None else None
373 def complete(self, score: pd.Series) -> Observation:
374 """
375 Completes the Suggestion by adding a score to turn it into an Observation.
377 Parameters
378 ----------
379 score : pandas.Series
380 The score metrics observed.
382 Returns
383 -------
384 Observation
385 The observation of the suggestion.
386 """
387 return Observation(
388 config=self.config,
389 score=score,
390 context=self.context,
391 metadata=self.metadata,
392 )
394 def to_configspace_config(self, space: ConfigurationSpace) -> Configuration:
395 """
396 Convert a Configuration Space to a Configuration.
398 Parameters
399 ----------
400 space : ConfigSpace.ConfigurationSpace
401 The ConfigurationSpace to be converted.
403 Returns
404 -------
405 ConfigSpace.Configuration
406 The output Configuration.
407 """
408 return Configuration(space, values=self._config.dropna().to_dict())
410 def __repr__(self) -> str:
411 return (
412 f"Suggestion(config={self._config}, context={self._context}, "
413 "metadata={self._metadata})"
414 )
416 def __eq__(self, other: Any) -> bool:
417 if not isinstance(other, Suggestion):
418 return False
420 if not self._config.equals(other._config):
421 return False
422 if not compare_optional_series(self._context, other._context):
423 return False
424 if not compare_optional_series(self._metadata, other._metadata):
425 return False
427 return True
429 def __ne__(self, other: Any) -> bool:
430 return not self.__eq__(other)