Coverage for mlos_viz/mlos_viz/util.py: 79%

24 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-06 00:35 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Utility functions for manipulating experiment results data. 

7""" 

8from typing import Dict, Literal, Optional, Tuple 

9 

10import pandas 

11 

12from mlos_bench.storage.base_experiment_data import ExperimentData 

13 

14 

15def expand_results_data_args( 

16 exp_data: Optional[ExperimentData] = None, 

17 results_df: Optional[pandas.DataFrame] = None, 

18 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

19) -> Tuple[pandas.DataFrame, Dict[str, bool]]: 

20 """ 

21 Expands some common arguments for working with results data. 

22 

23 Used by mlos_viz as well. 

24 

25 Parameters 

26 ---------- 

27 exp_data : Optional[ExperimentData], optional 

28 ExperimentData to operate on. 

29 results_df : Optional[pandas.DataFrame], optional 

30 Optional results_df argument. 

31 Defaults to exp_data.results_df property. 

32 objectives : Optional[Dict[str, Literal["min", "max"]]], optional 

33 Optional objectives set to operate on. 

34 Defaults to exp_data.objectives property. 

35 

36 Returns 

37 ------- 

38 Tuple[pandas.DataFrame, Dict[str, bool]] 

39 The results dataframe and the objectives columns in the dataframe, plus whether or not they are in ascending order. 

40 """ 

41 # Prepare the orderby columns. 

42 if results_df is None: 

43 if exp_data is None: 

44 raise ValueError("Must provide either exp_data or both results_df and objectives.") 

45 results_df = exp_data.results_df 

46 

47 if objectives is None: 

48 if exp_data is None: 

49 raise ValueError("Must provide either exp_data or both results_df and objectives.") 

50 objectives = exp_data.objectives 

51 objs_cols: Dict[str, bool] = {} 

52 for (opt_tgt, opt_dir) in objectives.items(): 

53 if opt_dir not in ["min", "max"]: 

54 raise ValueError(f"Unexpected optimization direction for target {opt_tgt}: {opt_dir}") 

55 ascending = opt_dir == "min" 

56 if opt_tgt.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and opt_tgt in results_df.columns: 

57 objs_cols[opt_tgt] = ascending 

58 elif ExperimentData.RESULT_COLUMN_PREFIX + opt_tgt in results_df.columns: 

59 objs_cols[ExperimentData.RESULT_COLUMN_PREFIX + opt_tgt] = ascending 

60 else: 

61 raise UserWarning(f"{opt_tgt} is not a result column for experiment {exp_data}") 

62 # Note: these copies are important to avoid issues with downstream consumers. 

63 # It is more efficient to copy the dataframe than to go back to the original data source. 

64 # TODO: However, it should be possible to later fixup the downstream consumers 

65 # (which are currently still internal to mlos-viz) to make their own data 

66 # sources if necessary. That will of course need tests. 

67 return (results_df.copy(), objs_cols.copy())