Coverage for mlos_bench/mlos_bench/storage/util.py: 89%
18 statements
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
« prev ^ index » next coverage.py v7.6.7, created at 2024-11-22 01:18 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Utility functions for the storage subsystem."""
7from typing import Dict, Optional
9import pandas
11from mlos_bench.tunables.tunable import TunableValue, TunableValueTypeTuple
12from mlos_bench.util import try_parse_val
15def kv_df_to_dict(dataframe: pandas.DataFrame) -> Dict[str, Optional[TunableValue]]:
16 """
17 Utility function to convert certain flat key-value dataframe formats used by the
18 mlos_bench.storage modules to a dict.
20 Parameters
21 ----------
22 dataframe : pandas.DataFrame
23 A dataframe with exactly two columns, 'parameter' (or 'metric') and 'value', where
24 'parameter' is a string and 'value' is some TunableValue or None.
25 """
26 if dataframe.columns.tolist() == ["metric", "value"]:
27 dataframe = dataframe.copy()
28 dataframe.rename(columns={"metric": "parameter"}, inplace=True)
29 assert dataframe.columns.tolist() == ["parameter", "value"]
30 data = {}
31 for _, row in dataframe.astype("O").iterrows():
32 if not isinstance(row["value"], TunableValueTypeTuple):
33 raise TypeError(f"Invalid column type: {type(row['value'])} value: {row['value']}")
34 assert isinstance(row["parameter"], str)
35 if row["parameter"] in data:
36 raise ValueError(f"Duplicate parameter '{row['parameter']}' in dataframe")
37 data[row["parameter"]] = (
38 try_parse_val(row["value"]) if isinstance(row["value"], str) else row["value"]
39 )
40 return data