Coverage for mlos_bench/mlos_bench/storage/util.py: 89%
19 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-06 00:35 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-06 00:35 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Utility functions for the storage subsystem.
7"""
9from typing import Dict, Optional
11import pandas
13from mlos_bench.tunables.tunable import TunableValue, TunableValueTypeTuple
14from mlos_bench.util import try_parse_val
17def kv_df_to_dict(dataframe: pandas.DataFrame) -> Dict[str, Optional[TunableValue]]:
18 """
19 Utility function to convert certain flat key-value dataframe formats used by the
20 mlos_bench.storage modules to a dict.
22 Parameters
23 ----------
24 dataframe : pandas.DataFrame
25 A dataframe with exactly two columns, 'parameter' (or 'metric') and 'value', where
26 'parameter' is a string and 'value' is some TunableValue or None.
27 """
28 if dataframe.columns.tolist() == ['metric', 'value']:
29 dataframe = dataframe.copy()
30 dataframe.rename(columns={'metric': 'parameter'}, inplace=True)
31 assert dataframe.columns.tolist() == ['parameter', 'value']
32 data = {}
33 for _, row in dataframe.astype('O').iterrows():
34 if not isinstance(row['value'], TunableValueTypeTuple):
35 raise TypeError(f"Invalid column type: {type(row['value'])} value: {row['value']}")
36 assert isinstance(row['parameter'], str)
37 if row['parameter'] in data:
38 raise ValueError(f"Duplicate parameter '{row['parameter']}' in dataframe")
39 data[row['parameter']] = try_parse_val(row['value']) if isinstance(row['value'], str) else row['value']
40 return data