Coverage for mlos_viz/mlos_viz/base.py: 90%
157 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 00:52 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 00:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""Base functions for visualizing, explain, and gain insights from results."""
7import re
8import warnings
9from collections.abc import Callable, Iterable
10from importlib.metadata import version
11from typing import Any, Literal
13import pandas
14import seaborn as sns
15from matplotlib import pyplot as plt
16from packaging.version import Version
17from pandas.api.types import is_numeric_dtype
18from pandas.core.groupby.generic import SeriesGroupBy
20from mlos_bench.storage.base_experiment_data import ExperimentData
21from mlos_viz.util import expand_results_data_args
23_SEABORN_VERS = Version(version("seaborn"))
26def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> dict[str, Any]:
27 """
28 Assembles a smaller kwargs dict for the specified target function.
30 Note: this only works with non-positional kwargs (e.g., those after a * arg).
31 """
32 target_kwargs = {}
33 for kword in target.__kwdefaults__: # or {} # intentionally omitted for now
34 if kword in kwargs:
35 target_kwargs[kword] = kwargs[kword]
36 return target_kwargs
39def ignore_plotter_warnings() -> None:
40 """Suppress some annoying warnings from third-party data visualization packages by
41 adding them to the warnings filter.
42 """
43 warnings.filterwarnings("ignore", category=FutureWarning)
44 if _SEABORN_VERS <= Version("0.13.1"):
45 warnings.filterwarnings(
46 "ignore",
47 category=DeprecationWarning,
48 module="seaborn", # but actually comes from pandas
49 message="is_categorical_dtype is deprecated and will be removed in a future version.",
50 )
51 # See Also: https://github.com/mwaskom/seaborn/issues/3804
52 warnings.filterwarnings(
53 "ignore",
54 category=PendingDeprecationWarning,
55 module="seaborn", # but actually comes from matplotlib
56 message=(
57 "vert: bool will be deprecated in a future version. "
58 "Use orientation: {'vertical', 'horizontal'} instead."
59 ),
60 )
63def _add_groupby_desc_column(
64 results_df: pandas.DataFrame,
65 groupby_columns: list[str] | None = None,
66) -> tuple[pandas.DataFrame, list[str], str]:
67 """
68 Adds a group descriptor column to the results_df.
70 Parameters
71 ----------
72 results_df: ExperimentData
73 The experiment data to add the descriptor column to.
74 groupby_columns: Optional[list[str]]
75 """
76 # Compose a new groupby_column for display purposes that is the
77 # concatenation of the min trial_id (the first one) of each config trial
78 # group and the config_id.
79 # Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to
80 # be on the same axis anyways.
81 if groupby_columns is None:
82 groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"]
83 groupby_column = ",".join(groupby_columns)
84 results_df[groupby_column] = (
85 results_df[groupby_columns].astype(str).apply(",".join, axis=1)
86 ) # pylint: disable=unnecessary-lambda
87 groupby_columns.append(groupby_column)
88 return (results_df, groupby_columns, groupby_column)
91def augment_results_df_with_config_trial_group_stats(
92 exp_data: ExperimentData | None = None,
93 *,
94 results_df: pandas.DataFrame | None = None,
95 requested_result_cols: Iterable[str] | None = None,
96) -> pandas.DataFrame:
97 # pylint: disable=too-complex
98 """
99 Add a number of useful statistical measure columns to the results dataframe.
101 In particular, for each numeric result, we add the following columns for each
102 requested result column:
104 - ".p50": the median of each config trial group results
106 - ".p75": the p75 of each config trial group results
108 - ".p90": the p90 of each config trial group results
110 - ".p95": the p95 of each config trial group results
112 - ".p99": the p95 of each config trial group results
114 - ".mean": the mean of each config trial group results
116 - ".stddev": the mean of each config trial group results
118 - ".var": the variance of each config trial group results
120 - ".var_zscore": the zscore of this group (i.e., variance relative to the stddev
121 of all group variances). This can be useful for filtering out outliers (e.g.,
122 configs with high variance relative to others by restricting to abs < 2 to
123 remove those two standard deviations from the mean variance across all config
124 trial groups).
126 Additionally, we add a "tunable_config_trial_group_size" column that indicates
127 the number of trials using a particular config.
129 Parameters
130 ----------
131 exp_data : ExperimentData
132 The ExperimentData (e.g., obtained from the storage layer) to plot.
133 results_df : pandas.DataFrame | None
134 The results dataframe to augment, by default None to use the results_df property.
135 requested_result_cols : Optional[Iterable[str]]
136 Which results columns to augment, by default None to use all results columns
137 that look numeric.
139 Returns
140 -------
141 pandas.DataFrame
142 The augmented results dataframe.
143 """
144 if results_df is None:
145 if exp_data is None:
146 raise ValueError("Either exp_data or results_df must be provided.")
147 results_df = exp_data.results_df
148 results_groups = results_df.groupby("tunable_config_id")
149 if len(results_groups) <= 1:
150 raise ValueError(f"Not enough data: {len(results_groups)}")
152 if requested_result_cols is None:
153 result_cols = {
154 col
155 for col in results_df.columns
156 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX)
157 }
158 else:
159 result_cols = {
160 col
161 for col in requested_result_cols
162 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns
163 }
164 result_cols.update(
165 {
166 ExperimentData.RESULT_COLUMN_PREFIX + col
167 for col in requested_result_cols
168 if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns
169 }
170 )
172 def compute_zscore_for_group_agg(
173 results_groups_perf: "SeriesGroupBy",
174 stats_df: pandas.DataFrame,
175 result_col: str,
176 agg: Literal["mean"] | Literal["var"] | Literal["std"],
177 ) -> None:
178 results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating?
179 # Compute the zscore of the chosen aggregate performance of each group into
180 # each row in the dataframe.
181 stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean()
182 stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std()
183 stats_df[result_col + f".{agg}_zscore"] = (
184 stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"]
185 ) / stats_df[result_col + f".{agg}_stddev"]
186 stats_df.drop(
187 columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True
188 )
190 augmented_results_df = results_df
191 augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform(
192 "count"
193 )
194 for result_col in result_cols:
195 if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX):
196 continue
197 if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE):
198 # Ignore computing variance on things like that look like timestamps.
199 continue
200 if not is_numeric_dtype(results_df[result_col]):
201 continue
202 if results_df[result_col].unique().size == 1:
203 continue
204 results_groups_perf = results_groups[result_col]
205 stats_df = pandas.DataFrame()
206 stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True)
207 stats_df[result_col + ".var"] = results_groups_perf.transform("var")
208 stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5)
210 compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var")
211 quantiles = [0.50, 0.75, 0.90, 0.95, 0.99]
212 for quantile in quantiles: # TODO: can we do this in one pass?
213 quantile_col = f"{result_col}.p{int(quantile * 100)}"
214 stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile)
215 augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1)
216 return augmented_results_df
219def limit_top_n_configs(
220 exp_data: ExperimentData | None = None,
221 *,
222 results_df: pandas.DataFrame | None = None,
223 objectives: dict[str, Literal["min", "max"]] | None = None,
224 top_n_configs: int = 10,
225 method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean",
226) -> tuple[pandas.DataFrame, list[int], dict[str, bool]]:
227 # pylint: disable=too-many-locals
228 """
229 Utility function to process the results and determine the best performing configs
230 including potential repeats to help assess variability.
232 Parameters
233 ----------
234 exp_data : ExperimentData | None
235 The ExperimentData (e.g., obtained from the storage layer) to operate on.
236 results_df : pandas.DataFrame | None
237 The results dataframe to augment, by default None to use
238 :py:attr:`.ExperimentData.results_df` property.
239 objectives : Iterable[str]
240 Which result column(s) to use for sorting the configs, and in which
241 direction ("min" or "max").
242 By default None to automatically select the :py:attr:`.ExperimentData.objectives`.
243 top_n_configs : int
244 How many configs to return, including the default, by default 10.
245 method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean",
246 Which statistical method to use when sorting the config groups before
247 determining the cutoff, by default "mean".
249 Returns
250 -------
251 (top_n_config_results_df, top_n_config_ids, orderby_cols) :
252 tuple[pandas.DataFrame, list[int], dict[str, bool]]
253 The filtered results dataframe, the config ids, and the columns used to
254 order the configs.
255 """
256 # Do some input checking first.
257 if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]:
258 raise ValueError(f"Invalid method: {method}")
260 # Prepare the orderby columns.
261 (results_df, objs_cols) = expand_results_data_args(
262 exp_data,
263 results_df=results_df,
264 objectives=objectives,
265 )
266 assert isinstance(results_df, pandas.DataFrame)
268 # Augment the results dataframe with some useful stats.
269 results_df = augment_results_df_with_config_trial_group_stats(
270 exp_data=exp_data,
271 results_df=results_df,
272 requested_result_cols=objs_cols.keys(),
273 )
274 # Note: mypy seems to lose its mind for some reason and keeps forgetting that
275 # results_df is not None and is in fact a DataFrame, so we periodically assert
276 # it in this func for now.
277 assert results_df is not None
278 orderby_cols: dict[str, bool] = {
279 obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items()
280 }
282 config_id_col = "tunable_config_id"
283 group_id_col = "tunable_config_trial_group_id" # first trial_id per config group
284 trial_id_col = "trial_id"
286 default_config_id = (
287 results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id
288 )
289 assert default_config_id is not None, "Failed to determine default config id."
291 # Filter out configs whose variance is too large.
292 # But also make sure the default configs is still in the resulting dataframe
293 # (for comparison purposes).
294 for obj_col in objs_cols:
295 assert results_df is not None
296 if method == "mean":
297 singletons_mask = results_df["tunable_config_trial_group_size"] == 1
298 else:
299 singletons_mask = results_df["tunable_config_trial_group_size"] > 1
300 results_df = results_df.loc[
301 (
302 (results_df[f"{obj_col}.var_zscore"].abs() < 2)
303 | (singletons_mask)
304 | (results_df[config_id_col] == default_config_id)
305 )
306 ]
307 assert results_df is not None
309 # Also, filter results that are worse than the default.
310 default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id]
311 for orderby_col, ascending in orderby_cols.items():
312 default_vals = default_config_results_df[orderby_col].unique()
313 assert len(default_vals) == 1
314 default_val = default_vals[0]
315 assert results_df is not None
316 if ascending:
317 results_df = results_df.loc[(results_df[orderby_col] <= default_val)]
318 else:
319 results_df = results_df.loc[(results_df[orderby_col] >= default_val)]
321 # Now regroup and filter to the top-N configs by their group performance dimensions.
322 assert results_df is not None
323 group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[
324 orderby_cols.keys()
325 ]
326 top_n_config_ids: list[int] = (
327 group_results_df.sort_values(
328 by=list(orderby_cols.keys()), ascending=list(orderby_cols.values())
329 )
330 .head(top_n_configs)
331 .index.tolist()
332 )
334 # Remove the default config if it's included. We'll add it back later.
335 if default_config_id in top_n_config_ids:
336 top_n_config_ids.remove(default_config_id)
337 # Get just the top-n config results.
338 # Sort by the group ids.
339 top_n_config_results_df = results_df.loc[
340 (results_df[config_id_col].isin(top_n_config_ids))
341 ].sort_values([group_id_col, config_id_col, trial_id_col])
342 # Place the default config at the top of the list.
343 top_n_config_ids.insert(0, default_config_id)
344 top_n_config_results_df = pandas.concat(
345 [default_config_results_df, top_n_config_results_df],
346 axis=0,
347 )
348 return (top_n_config_results_df, top_n_config_ids, orderby_cols)
351def plot_optimizer_trends(
352 exp_data: ExperimentData | None = None,
353 *,
354 results_df: pandas.DataFrame | None = None,
355 objectives: dict[str, Literal["min", "max"]] | None = None,
356) -> None:
357 """
358 Plots the optimizer trends for the Experiment.
360 Parameters
361 ----------
362 exp_data : ExperimentData
363 The ExperimentData (e.g., obtained from the storage layer) to plot.
364 results_df : pandas.DataFrame | None
365 Optional results_df to plot.
366 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.
367 objectives : Optional[dict[str, Literal["min", "max"]]]
368 Optional objectives to plot.
369 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.
370 """
371 (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
372 (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df)
374 for objective_column, ascending in obj_cols.items():
375 incumbent_column = objective_column + ".incumbent"
377 # Determine the mean of each config trial group to match the box plots.
378 group_results_df = (
379 results_df.groupby(groupby_columns)[objective_column]
380 .mean()
381 .reset_index()
382 .sort_values(groupby_columns)
383 )
384 #
385 # Note: technically the optimizer (usually) uses the *first* result for a
386 # given config trial group before moving on to a new config (x-axis), so
387 # plotting the mean may be slightly misleading when trying to understand the
388 # actual path taken by the optimizer in case of high variance samples.
389 # Here's a way to do that, though it can also be misleading if the optimizer
390 # later gets a worse value for that config group as well.
391 #
392 # group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby(
393 # groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index()
395 # Calculate the incumbent (best seen so far)
396 if ascending:
397 group_results_df[incumbent_column] = group_results_df[objective_column].cummin()
398 else:
399 group_results_df[incumbent_column] = group_results_df[objective_column].cummax()
401 (_fig, axis) = plt.subplots(figsize=(15, 5))
403 # Result of each set of trials for a config
404 sns.boxplot(
405 data=results_df,
406 x=groupby_column,
407 y=objective_column,
408 ax=axis,
409 )
411 # Results of the best so far.
412 axis = sns.lineplot(
413 data=group_results_df,
414 x=groupby_column,
415 y=incumbent_column,
416 alpha=0.7,
417 label="Mean of Incumbent Config Trial Group",
418 ax=axis,
419 )
421 plt.yscale("log")
422 plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, ""))
424 plt.xlabel("Config Trial Group ID, Config ID")
425 plt.xticks(rotation=90, fontsize=8)
427 plt.title(
428 "Optimizer Trends for Experiment: " + exp_data.experiment_id
429 if exp_data is not None
430 else ""
431 )
432 plt.grid()
433 plt.show()
436def plot_top_n_configs(
437 exp_data: ExperimentData | None = None,
438 *,
439 results_df: pandas.DataFrame | None = None,
440 objectives: dict[str, Literal["min", "max"]] | None = None,
441 with_scatter_plot: bool = False,
442 **kwargs: Any,
443) -> None:
444 # pylint: disable=too-many-locals
445 """
446 Plots the top-N configs along with the default config for the given
447 :py:class:`.ExperimentData`.
449 Intended to be used from a Jupyter notebook.
451 Parameters
452 ----------
453 exp_data: ExperimentData
454 The experiment data to plot.
455 results_df : pandas.DataFrame | None
456 Optional results_df to plot.
457 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.
458 objectives : Optional[dict[str, Literal["min", "max"]]]
459 Optional objectives to plot.
460 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.
461 with_scatter_plot : bool
462 Whether to also add scatter plot to the output figure.
463 kwargs : dict
464 Remaining keyword arguments are passed along to the
465 :py:func:`limit_top_n_configs` function.
466 """
467 (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)
468 top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs)
469 if "results_df" not in top_n_config_args:
470 top_n_config_args["results_df"] = results_df
471 if "objectives" not in top_n_config_args:
472 top_n_config_args["objectives"] = objectives
473 (top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs(
474 exp_data=exp_data,
475 **top_n_config_args,
476 )
478 (top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column(
479 top_n_config_results_df,
480 )
481 top_n = len(top_n_config_results_df[groupby_column].unique()) - 1
483 for orderby_col, ascending in orderby_cols.items():
484 opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")
485 (_fig, axis) = plt.subplots()
486 sns.violinplot(
487 data=top_n_config_results_df,
488 x=groupby_column,
489 y=orderby_col,
490 ax=axis,
491 )
492 if with_scatter_plot:
493 sns.scatterplot(
494 data=top_n_config_results_df,
495 x=groupby_column,
496 y=orderby_col,
497 legend=False,
498 ax=axis,
499 )
500 plt.grid()
501 (xticks, xlabels) = plt.xticks()
502 # default should be in the first position based on top_n_configs() return
503 xlabels[0] = "default" # type: ignore[call-overload]
504 plt.xticks(xticks, xlabels) # type: ignore[arg-type]
505 plt.xlabel("Config Trial Group, Config ID")
506 plt.xticks(rotation=90)
507 plt.ylabel(opt_tgt)
508 plt.yscale("log")
509 extra_title = "(lower is better)" if ascending else "(lower is better)"
510 plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}")
511 plt.show()