Coverage for mlos_viz/mlos

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""Base functions for visualizing, explain, and gain insights from results."""

7import re

8import warnings

9from collections.abc import Callable, Iterable

10from importlib.metadata import version

11from typing import Any, Literal

13import pandas

14import seaborn as sns

15from matplotlib import pyplot as plt

16from packaging.version import Version

17from pandas.api.types import is_numeric_dtype

18from pandas.core.groupby.generic import SeriesGroupBy

20from mlos_bench.storage.base_experiment_data import ExperimentData

21from mlos_viz.util import expand_results_data_args

23_SEABORN_VERS = Version(version("seaborn"))

26def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> dict[str, Any]:

27 """

28 Assembles a smaller kwargs dict for the specified target function.

30 Note: this only works with non-positional kwargs (e.g., those after a * arg).

31 """

32 target_kwargs = {}

33 for kword in target.__kwdefaults__: # or {} # intentionally omitted for now

34 if kword in kwargs:

35 target_kwargs[kword] = kwargs[kword]

36 return target_kwargs

39def ignore_plotter_warnings() -> None:

40 """Suppress some annoying warnings from third-party data visualization packages by

41 adding them to the warnings filter.

42 """

43 warnings.filterwarnings("ignore", category=FutureWarning)

44 if _SEABORN_VERS <= Version("0.13.1"):

45 warnings.filterwarnings(

46 "ignore",

47 category=DeprecationWarning,

48 module="seaborn", # but actually comes from pandas

49 message="is_categorical_dtype is deprecated and will be removed in a future version.",

50 )

51 # See Also: https://github.com/mwaskom/seaborn/issues/3804

52 warnings.filterwarnings(

53 "ignore",

54 category=PendingDeprecationWarning,

55 module="seaborn", # but actually comes from matplotlib

56 message=(

57 "vert: bool will be deprecated in a future version. "

58 "Use orientation: {'vertical', 'horizontal'} instead."

59 ),

60 )

63def _add_groupby_desc_column(

64 results_df: pandas.DataFrame,

65 groupby_columns: list[str] | None = None,

66) -> tuple[pandas.DataFrame, list[str], str]:

67 """

68 Adds a group descriptor column to the results_df.

70 Parameters

71 ----------

72 results_df: ExperimentData

73 The experiment data to add the descriptor column to.

74 groupby_columns: Optional[list[str]]

75 """

76 # Compose a new groupby_column for display purposes that is the

77 # concatenation of the min trial_id (the first one) of each config trial

78 # group and the config_id.

79 # Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to

80 # be on the same axis anyways.

81 if groupby_columns is None:

82 groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"]

83 groupby_column = ",".join(groupby_columns)

84 results_df[groupby_column] = (

85 results_df[groupby_columns].astype(str).apply(",".join, axis=1)

86 ) # pylint: disable=unnecessary-lambda

87 groupby_columns.append(groupby_column)

88 return (results_df, groupby_columns, groupby_column)

91def augment_results_df_with_config_trial_group_stats(

92 exp_data: ExperimentData | None = None,

93 *,

94 results_df: pandas.DataFrame | None = None,

95 requested_result_cols: Iterable[str] | None = None,

96) -> pandas.DataFrame:

97 # pylint: disable=too-complex

98 """

99 Add a number of useful statistical measure columns to the results dataframe.

100

101 In particular, for each numeric result, we add the following columns for each

102 requested result column:

103

104 - ".p50": the median of each config trial group results

105

106 - ".p75": the p75 of each config trial group results

107

108 - ".p90": the p90 of each config trial group results

109

110 - ".p95": the p95 of each config trial group results

111

112 - ".p99": the p95 of each config trial group results

113

114 - ".mean": the mean of each config trial group results

115

116 - ".stddev": the mean of each config trial group results

117

118 - ".var": the variance of each config trial group results

119

120 - ".var_zscore": the zscore of this group (i.e., variance relative to the stddev

121 of all group variances). This can be useful for filtering out outliers (e.g.,

122 configs with high variance relative to others by restricting to abs < 2 to

123 remove those two standard deviations from the mean variance across all config

124 trial groups).

125

126 Additionally, we add a "tunable_config_trial_group_size" column that indicates

127 the number of trials using a particular config.

128

129 Parameters

130 ----------

131 exp_data : ExperimentData

132 The ExperimentData (e.g., obtained from the storage layer) to plot.

133 results_df : pandas.DataFrame | None

134 The results dataframe to augment, by default None to use the results_df property.

135 requested_result_cols : Optional[Iterable[str]]

136 Which results columns to augment, by default None to use all results columns

137 that look numeric.

138

139 Returns

140 -------

141 pandas.DataFrame

142 The augmented results dataframe.

143 """

144 if results_df is None:

145 if exp_data is None:

146 raise ValueError("Either exp_data or results_df must be provided.")

147 results_df = exp_data.results_df

148 results_groups = results_df.groupby("tunable_config_id")

149 if len(results_groups) <= 1:

150 raise ValueError(f"Not enough data: {len(results_groups)}")

151

152 if requested_result_cols is None:

153 result_cols = {

154 col

155 for col in results_df.columns

156 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX)

157 }

158 else:

159 result_cols = {

160 col

161 for col in requested_result_cols

162 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns

163 }

164 result_cols.update(

165 {

166 ExperimentData.RESULT_COLUMN_PREFIX + col

167 for col in requested_result_cols

168 if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns

169 }

170 )

171

172 def compute_zscore_for_group_agg(

173 results_groups_perf: "SeriesGroupBy",

174 stats_df: pandas.DataFrame,

175 result_col: str,

176 agg: Literal["mean"] | Literal["var"] | Literal["std"],

177 ) -> None:

178 results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating?

179 # Compute the zscore of the chosen aggregate performance of each group into

180 # each row in the dataframe.

181 stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean()

182 stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std()

183 stats_df[result_col + f".{agg}_zscore"] = (

184 stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"]

185 ) / stats_df[result_col + f".{agg}_stddev"]

186 stats_df.drop(

187 columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True

188 )

189

190 augmented_results_df = results_df

191 augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform(

192 "count"

193 )

194 for result_col in result_cols:

195 if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX):

196 continue

197 if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE):

198 # Ignore computing variance on things like that look like timestamps.

199 continue

200 if not is_numeric_dtype(results_df[result_col]):

201 continue

202 if results_df[result_col].unique().size == 1:

203 continue

204 results_groups_perf = results_groups[result_col]

205 stats_df = pandas.DataFrame()

206 stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True)

207 stats_df[result_col + ".var"] = results_groups_perf.transform("var")

208 stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5)

209

210 compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var")

211 quantiles = [0.50, 0.75, 0.90, 0.95, 0.99]

212 for quantile in quantiles: # TODO: can we do this in one pass?

213 quantile_col = f"{result_col}.p{int(quantile * 100)}"

214 stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile)

215 augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1)

216 return augmented_results_df

217

218

219def limit_top_n_configs(

220 exp_data: ExperimentData | None = None,

221 *,

222 results_df: pandas.DataFrame | None = None,

223 objectives: dict[str, Literal["min", "max"]] | None = None,

224 top_n_configs: int = 10,

225 method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean",

226) -> tuple[pandas.DataFrame, list[int], dict[str, bool]]:

227 # pylint: disable=too-many-locals

228 """

229 Utility function to process the results and determine the best performing configs

230 including potential repeats to help assess variability.

231

232 Parameters

233 ----------

234 exp_data : ExperimentData | None

235 The ExperimentData (e.g., obtained from the storage layer) to operate on.

236 results_df : pandas.DataFrame | None

237 The results dataframe to augment, by default None to use

238 :py:attr:`.ExperimentData.results_df` property.

239 objectives : Iterable[str]

240 Which result column(s) to use for sorting the configs, and in which

241 direction ("min" or "max").

242 By default None to automatically select the :py:attr:`.ExperimentData.objectives`.

243 top_n_configs : int

244 How many configs to return, including the default, by default 10.

245 method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean",

246 Which statistical method to use when sorting the config groups before

247 determining the cutoff, by default "mean".

248

249 Returns

250 -------

251 (top_n_config_results_df, top_n_config_ids, orderby_cols) :

252 tuple[pandas.DataFrame, list[int], dict[str, bool]]

253 The filtered results dataframe, the config ids, and the columns used to

254 order the configs.

255 """

256 # Do some input checking first.

257 if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]:

258 raise ValueError(f"Invalid method: {method}")

259

260 # Prepare the orderby columns.

261 (results_df, objs_cols) = expand_results_data_args(

262 exp_data,

263 results_df=results_df,

264 objectives=objectives,

265 )

266 assert isinstance(results_df, pandas.DataFrame)

267

268 # Augment the results dataframe with some useful stats.

269 results_df = augment_results_df_with_config_trial_group_stats(

270 exp_data=exp_data,

271 results_df=results_df,

272 requested_result_cols=objs_cols.keys(),

273 )

274 # Note: mypy seems to lose its mind for some reason and keeps forgetting that

275 # results_df is not None and is in fact a DataFrame, so we periodically assert

276 # it in this func for now.

277 assert results_df is not None

278 orderby_cols: dict[str, bool] = {

279 obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items()

280 }

281

282 config_id_col = "tunable_config_id"

283 group_id_col = "tunable_config_trial_group_id" # first trial_id per config group

284 trial_id_col = "trial_id"

285

286 default_config_id = (

287 results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id

288 )

289 assert default_config_id is not None, "Failed to determine default config id."

290

291 # Filter out configs whose variance is too large.

292 # But also make sure the default configs is still in the resulting dataframe

293 # (for comparison purposes).

294 for obj_col in objs_cols:

295 assert results_df is not None

296 if method == "mean":

297 singletons_mask = results_df["tunable_config_trial_group_size"] == 1

298 else:

299 singletons_mask = results_df["tunable_config_trial_group_size"] > 1

300 results_df = results_df.loc[

301 (

302 (results_df[f"{obj_col}.var_zscore"].abs() < 2)

303 | (singletons_mask)

304 | (results_df[config_id_col] == default_config_id)

305 )

306 ]

307 assert results_df is not None

308

309 # Also, filter results that are worse than the default.

310 default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id]

311 for orderby_col, ascending in orderby_cols.items():

312 default_vals = default_config_results_df[orderby_col].unique()

313 assert len(default_vals) == 1

314 default_val = default_vals[0]

315 assert results_df is not None

316 if ascending:

317 results_df = results_df.loc[(results_df[orderby_col] <= default_val)]

318 else:

319 results_df = results_df.loc[(results_df[orderby_col] >= default_val)]

320

321 # Now regroup and filter to the top-N configs by their group performance dimensions.

322 assert results_df is not None

323 group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[

324 orderby_cols.keys()

325 ]

326 top_n_config_ids: list[int] = (

327 group_results_df.sort_values(

328 by=list(orderby_cols.keys()), ascending=list(orderby_cols.values())

329 )

330 .head(top_n_configs)

331 .index.tolist()

332 )

333

334 # Remove the default config if it's included. We'll add it back later.

335 if default_config_id in top_n_config_ids:

336 top_n_config_ids.remove(default_config_id)

337 # Get just the top-n config results.

338 # Sort by the group ids.

339 top_n_config_results_df = results_df.loc[

340 (results_df[config_id_col].isin(top_n_config_ids))

341 ].sort_values([group_id_col, config_id_col, trial_id_col])

342 # Place the default config at the top of the list.

343 top_n_config_ids.insert(0, default_config_id)

344 top_n_config_results_df = pandas.concat(

345 [default_config_results_df, top_n_config_results_df],

346 axis=0,

347 )

348 return (top_n_config_results_df, top_n_config_ids, orderby_cols)

349

350

351def plot_optimizer_trends(

352 exp_data: ExperimentData | None = None,

353 *,

354 results_df: pandas.DataFrame | None = None,

355 objectives: dict[str, Literal["min", "max"]] | None = None,

356) -> None:

357 """

358 Plots the optimizer trends for the Experiment.

359

360 Parameters

361 ----------

362 exp_data : ExperimentData

363 The ExperimentData (e.g., obtained from the storage layer) to plot.

364 results_df : pandas.DataFrame | None

365 Optional results_df to plot.

366 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.

367 objectives : Optional[dict[str, Literal["min", "max"]]]

368 Optional objectives to plot.

369 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.

370 """

371 (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives)

372 (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df)

373

374 for objective_column, ascending in obj_cols.items():

375 incumbent_column = objective_column + ".incumbent"

376

377 # Determine the mean of each config trial group to match the box plots.

378 group_results_df = (

379 results_df.groupby(groupby_columns)[objective_column]

380 .mean()

381 .reset_index()

382 .sort_values(groupby_columns)

383 )

384 #

385 # Note: technically the optimizer (usually) uses the *first* result for a

386 # given config trial group before moving on to a new config (x-axis), so

387 # plotting the mean may be slightly misleading when trying to understand the

388 # actual path taken by the optimizer in case of high variance samples.

389 # Here's a way to do that, though it can also be misleading if the optimizer

390 # later gets a worse value for that config group as well.

391 #

392 # group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby(

393 # groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index()

394

395 # Calculate the incumbent (best seen so far)

396 if ascending:

397 group_results_df[incumbent_column] = group_results_df[objective_column].cummin()

398 else:

399 group_results_df[incumbent_column] = group_results_df[objective_column].cummax()

400

401 (_fig, axis) = plt.subplots(figsize=(15, 5))

402

403 # Result of each set of trials for a config

404 sns.boxplot(

405 data=results_df,

406 x=groupby_column,

407 y=objective_column,

408 ax=axis,

409 )

410

411 # Results of the best so far.

412 axis = sns.lineplot(

413 data=group_results_df,

414 x=groupby_column,

415 y=incumbent_column,

416 alpha=0.7,

417 label="Mean of Incumbent Config Trial Group",

418 ax=axis,

419 )

420

421 plt.yscale("log")

422 plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, ""))

423

424 plt.xlabel("Config Trial Group ID, Config ID")

425 plt.xticks(rotation=90, fontsize=8)

426

427 plt.title(

428 "Optimizer Trends for Experiment: " + exp_data.experiment_id

429 if exp_data is not None

430 else ""

431 )

432 plt.grid()

433 plt.show()

434

435

436def plot_top_n_configs(

437 exp_data: ExperimentData | None = None,

438 *,

439 results_df: pandas.DataFrame | None = None,

440 objectives: dict[str, Literal["min", "max"]] | None = None,

441 with_scatter_plot: bool = False,

442 **kwargs: Any,

443) -> None:

444 # pylint: disable=too-many-locals

445 """

446 Plots the top-N configs along with the default config for the given

447 :py:class:`.ExperimentData`.

448

449 Intended to be used from a Jupyter notebook.

450

451 Parameters

452 ----------

453 exp_data: ExperimentData

454 The experiment data to plot.

455 results_df : pandas.DataFrame | None

456 Optional results_df to plot.

457 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property.

458 objectives : Optional[dict[str, Literal["min", "max"]]]

459 Optional objectives to plot.

460 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property.

461 with_scatter_plot : bool

462 Whether to also add scatter plot to the output figure.

463 kwargs : dict

464 Remaining keyword arguments are passed along to the

465 :py:func:`limit_top_n_configs` function.

466 """

467 (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives)

468 top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs)

469 if "results_df" not in top_n_config_args:

470 top_n_config_args["results_df"] = results_df

471 if "objectives" not in top_n_config_args:

472 top_n_config_args["objectives"] = objectives

473 (top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs(

474 exp_data=exp_data,

475 **top_n_config_args,

476 )

477

478 (top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column(

479 top_n_config_results_df,

480 )

481 top_n = len(top_n_config_results_df[groupby_column].unique()) - 1

482

483 for orderby_col, ascending in orderby_cols.items():

484 opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")

485 (_fig, axis) = plt.subplots()

486 sns.violinplot(

487 data=top_n_config_results_df,

488 x=groupby_column,

489 y=orderby_col,

490 ax=axis,

491 )

492 if with_scatter_plot:

493 sns.scatterplot(

494 data=top_n_config_results_df,

495 x=groupby_column,

496 y=orderby_col,

497 legend=False,

498 ax=axis,

499 )

500 plt.grid()

501 (xticks, xlabels) = plt.xticks()

502 # default should be in the first position based on top_n_configs() return

503 xlabels[0] = "default" # type: ignore[call-overload]

504 plt.xticks(xticks, xlabels) # type: ignore[arg-type]

505 plt.xlabel("Config Trial Group, Config ID")

506 plt.xticks(rotation=90)

507 plt.ylabel(opt_tgt)

508 plt.yscale("log")

509 extra_title = "(lower is better)" if ascending else "(lower is better)"

510 plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}")

511 plt.show()

Coverage for mlos_viz/mlos_viz/base.py: 90%

157 statements