Coverage for mlos_bench/mlos_bench/storage/util.py: 89%

19 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-06 00:35 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Utility functions for the storage subsystem. 

7""" 

8 

9from typing import Dict, Optional 

10 

11import pandas 

12 

13from mlos_bench.tunables.tunable import TunableValue, TunableValueTypeTuple 

14from mlos_bench.util import try_parse_val 

15 

16 

17def kv_df_to_dict(dataframe: pandas.DataFrame) -> Dict[str, Optional[TunableValue]]: 

18 """ 

19 Utility function to convert certain flat key-value dataframe formats used by the 

20 mlos_bench.storage modules to a dict. 

21 

22 Parameters 

23 ---------- 

24 dataframe : pandas.DataFrame 

25 A dataframe with exactly two columns, 'parameter' (or 'metric') and 'value', where 

26 'parameter' is a string and 'value' is some TunableValue or None. 

27 """ 

28 if dataframe.columns.tolist() == ['metric', 'value']: 

29 dataframe = dataframe.copy() 

30 dataframe.rename(columns={'metric': 'parameter'}, inplace=True) 

31 assert dataframe.columns.tolist() == ['parameter', 'value'] 

32 data = {} 

33 for _, row in dataframe.astype('O').iterrows(): 

34 if not isinstance(row['value'], TunableValueTypeTuple): 

35 raise TypeError(f"Invalid column type: {type(row['value'])} value: {row['value']}") 

36 assert isinstance(row['parameter'], str) 

37 if row['parameter'] in data: 

38 raise ValueError(f"Duplicate parameter '{row['parameter']}' in dataframe") 

39 data[row['parameter']] = try_parse_val(row['value']) if isinstance(row['value'], str) else row['value'] 

40 return data