Coverage for mlos_viz/mlos_viz/util.py: 78%
23 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-20 00:44 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-20 00:44 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Utility functions for manipulating experiment results data."""
6from typing import Dict, Literal, Optional, Tuple
8import pandas
10from mlos_bench.storage.base_experiment_data import ExperimentData
13def expand_results_data_args(
14 exp_data: Optional[ExperimentData] = None,
15 results_df: Optional[pandas.DataFrame] = None,
16 objectives: Optional[Dict[str, Literal["min", "max"]]] = None,
17) -> Tuple[pandas.DataFrame, Dict[str, bool]]:
18 """
19 Expands some common arguments for working with results data.
21 Used by mlos_viz as well.
23 Parameters
24 ----------
25 exp_data : Optional[ExperimentData]
26 ExperimentData to operate on.
27 results_df : Optional[pandas.DataFrame]
28 Optional results_df argument.
29 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.
30 objectives : Optional[Dict[str, Literal["min", "max"]]]
31 Optional objectives set to operate on.
32 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.
34 Returns
35 -------
36 Tuple[pandas.DataFrame, Dict[str, bool]]
37 The results dataframe and the objectives columns in the dataframe, plus
38 whether or not they are in ascending order.
39 """
40 # Prepare the orderby columns.
41 if results_df is None:
42 if exp_data is None:
43 raise ValueError("Must provide either exp_data or both results_df and objectives.")
44 results_df = exp_data.results_df
46 if objectives is None:
47 if exp_data is None:
48 raise ValueError("Must provide either exp_data or both results_df and objectives.")
49 objectives = exp_data.objectives
50 objs_cols: Dict[str, bool] = {}
51 for opt_tgt, opt_dir in objectives.items():
52 if opt_dir not in ["min", "max"]:
53 raise ValueError(f"Unexpected optimization direction for target {opt_tgt}: {opt_dir}")
54 ascending = opt_dir == "min"
55 if (
56 opt_tgt.startswith(ExperimentData.RESULT_COLUMN_PREFIX)
57 and opt_tgt in results_df.columns
58 ):
59 objs_cols[opt_tgt] = ascending
60 elif ExperimentData.RESULT_COLUMN_PREFIX + opt_tgt in results_df.columns:
61 objs_cols[ExperimentData.RESULT_COLUMN_PREFIX + opt_tgt] = ascending
62 else:
63 raise UserWarning(f"{opt_tgt} is not a result column for experiment {exp_data}")
64 # Note: these copies are important to avoid issues with downstream consumers.
65 # It is more efficient to copy the dataframe than to go back to the original data source.
66 # TODO: However, it should be possible to later fixup the downstream consumers
67 # (which are currently still internal to mlos-viz) to make their own data
68 # sources if necessary. That will of course need tests.
69 return (results_df.copy(), objs_cols.copy())