Coverage for mlos_viz/mlos_viz/base.py: 90%

155 statements  

« prev     ^ index     » next       coverage.py v7.6.9, created at 2024-12-20 00:44 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5"""Base functions for visualizing, explain, and gain insights from results.""" 

6 

7import re 

8import warnings 

9from importlib.metadata import version 

10from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Tuple, Union 

11 

12import pandas 

13import seaborn as sns 

14from matplotlib import pyplot as plt 

15from pandas.api.types import is_numeric_dtype 

16from pandas.core.groupby.generic import SeriesGroupBy 

17 

18from mlos_bench.storage.base_experiment_data import ExperimentData 

19from mlos_viz.util import expand_results_data_args 

20 

21_SEABORN_VERS = version("seaborn") 

22 

23 

24def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> Dict[str, Any]: 

25 """ 

26 Assembles a smaller kwargs dict for the specified target function. 

27 

28 Note: this only works with non-positional kwargs (e.g., those after a * arg). 

29 """ 

30 target_kwargs = {} 

31 for kword in target.__kwdefaults__: # or {} # intentionally omitted for now 

32 if kword in kwargs: 

33 target_kwargs[kword] = kwargs[kword] 

34 return target_kwargs 

35 

36 

37def ignore_plotter_warnings() -> None: 

38 """Suppress some annoying warnings from third-party data visualization packages by 

39 adding them to the warnings filter. 

40 """ 

41 warnings.filterwarnings("ignore", category=FutureWarning) 

42 if _SEABORN_VERS <= "0.13.1": 

43 warnings.filterwarnings( 

44 "ignore", 

45 category=DeprecationWarning, 

46 module="seaborn", # but actually comes from pandas 

47 message="is_categorical_dtype is deprecated and will be removed in a future version.", 

48 ) 

49 # See Also: https://github.com/mwaskom/seaborn/issues/3804 

50 warnings.filterwarnings( 

51 "ignore", 

52 category=PendingDeprecationWarning, 

53 module="seaborn", # but actually comes from matplotlib 

54 message=( 

55 "vert: bool will be deprecated in a future version. " 

56 "Use orientation: {'vertical', 'horizontal'} instead." 

57 ), 

58 ) 

59 

60 

61def _add_groupby_desc_column( 

62 results_df: pandas.DataFrame, 

63 groupby_columns: Optional[List[str]] = None, 

64) -> Tuple[pandas.DataFrame, List[str], str]: 

65 """ 

66 Adds a group descriptor column to the results_df. 

67 

68 Parameters 

69 ---------- 

70 results_df: ExperimentData 

71 The experiment data to add the descriptor column to. 

72 groupby_columns: Optional[List[str]] 

73 """ 

74 # Compose a new groupby_column for display purposes that is the 

75 # concatenation of the min trial_id (the first one) of each config trial 

76 # group and the config_id. 

77 # Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to 

78 # be on the same axis anyways. 

79 if groupby_columns is None: 

80 groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"] 

81 groupby_column = ",".join(groupby_columns) 

82 results_df[groupby_column] = ( 

83 results_df[groupby_columns].astype(str).apply(",".join, axis=1) 

84 ) # pylint: disable=unnecessary-lambda 

85 groupby_columns.append(groupby_column) 

86 return (results_df, groupby_columns, groupby_column) 

87 

88 

89def augment_results_df_with_config_trial_group_stats( 

90 exp_data: Optional[ExperimentData] = None, 

91 *, 

92 results_df: Optional[pandas.DataFrame] = None, 

93 requested_result_cols: Optional[Iterable[str]] = None, 

94) -> pandas.DataFrame: 

95 # pylint: disable=too-complex 

96 """ 

97 Add a number of useful statistical measure columns to the results dataframe. 

98 

99 In particular, for each numeric result, we add the following columns for each 

100 requested result column: 

101 

102 - ".p50": the median of each config trial group results 

103 

104 - ".p75": the p75 of each config trial group results 

105 

106 - ".p90": the p90 of each config trial group results 

107 

108 - ".p95": the p95 of each config trial group results 

109 

110 - ".p99": the p95 of each config trial group results 

111 

112 - ".mean": the mean of each config trial group results 

113 

114 - ".stddev": the mean of each config trial group results 

115 

116 - ".var": the variance of each config trial group results 

117 

118 - ".var_zscore": the zscore of this group (i.e., variance relative to the stddev 

119 of all group variances). This can be useful for filtering out outliers (e.g., 

120 configs with high variance relative to others by restricting to abs < 2 to 

121 remove those two standard deviations from the mean variance across all config 

122 trial groups). 

123 

124 Additionally, we add a "tunable_config_trial_group_size" column that indicates 

125 the number of trials using a particular config. 

126 

127 Parameters 

128 ---------- 

129 exp_data : ExperimentData 

130 The ExperimentData (e.g., obtained from the storage layer) to plot. 

131 results_df : Optional[pandas.DataFrame] 

132 The results dataframe to augment, by default None to use the results_df property. 

133 requested_result_cols : Optional[Iterable[str]] 

134 Which results columns to augment, by default None to use all results columns 

135 that look numeric. 

136 

137 Returns 

138 ------- 

139 pandas.DataFrame 

140 The augmented results dataframe. 

141 """ 

142 if results_df is None: 

143 if exp_data is None: 

144 raise ValueError("Either exp_data or results_df must be provided.") 

145 results_df = exp_data.results_df 

146 results_groups = results_df.groupby("tunable_config_id") 

147 if len(results_groups) <= 1: 

148 raise ValueError(f"Not enough data: {len(results_groups)}") 

149 

150 if requested_result_cols is None: 

151 result_cols = set( 

152 col 

153 for col in results_df.columns 

154 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) 

155 ) 

156 else: 

157 result_cols = set( 

158 col 

159 for col in requested_result_cols 

160 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns 

161 ) 

162 result_cols.update( 

163 set( 

164 ExperimentData.RESULT_COLUMN_PREFIX + col 

165 for col in requested_result_cols 

166 if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns 

167 ) 

168 ) 

169 

170 def compute_zscore_for_group_agg( 

171 results_groups_perf: "SeriesGroupBy", 

172 stats_df: pandas.DataFrame, 

173 result_col: str, 

174 agg: Union[Literal["mean"], Literal["var"], Literal["std"]], 

175 ) -> None: 

176 results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating? 

177 # Compute the zscore of the chosen aggregate performance of each group into 

178 # each row in the dataframe. 

179 stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean() 

180 stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std() 

181 stats_df[result_col + f".{agg}_zscore"] = ( 

182 stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"] 

183 ) / stats_df[result_col + f".{agg}_stddev"] 

184 stats_df.drop( 

185 columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True 

186 ) 

187 

188 augmented_results_df = results_df 

189 augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform( 

190 "count" 

191 ) 

192 for result_col in result_cols: 

193 if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX): 

194 continue 

195 if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE): 

196 # Ignore computing variance on things like that look like timestamps. 

197 continue 

198 if not is_numeric_dtype(results_df[result_col]): 

199 continue 

200 if results_df[result_col].unique().size == 1: 

201 continue 

202 results_groups_perf = results_groups[result_col] 

203 stats_df = pandas.DataFrame() 

204 stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True) 

205 stats_df[result_col + ".var"] = results_groups_perf.transform("var") 

206 stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5) 

207 

208 compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var") 

209 quantiles = [0.50, 0.75, 0.90, 0.95, 0.99] 

210 for quantile in quantiles: # TODO: can we do this in one pass? 

211 quantile_col = f"{result_col}.p{int(quantile * 100)}" 

212 stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile) 

213 augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1) 

214 return augmented_results_df 

215 

216 

217def limit_top_n_configs( 

218 exp_data: Optional[ExperimentData] = None, 

219 *, 

220 results_df: Optional[pandas.DataFrame] = None, 

221 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

222 top_n_configs: int = 10, 

223 method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean", 

224) -> Tuple[pandas.DataFrame, List[int], Dict[str, bool]]: 

225 # pylint: disable=too-many-locals 

226 """ 

227 Utility function to process the results and determine the best performing configs 

228 including potential repeats to help assess variability. 

229 

230 Parameters 

231 ---------- 

232 exp_data : Optional[ExperimentData] 

233 The ExperimentData (e.g., obtained from the storage layer) to operate on. 

234 results_df : Optional[pandas.DataFrame] 

235 The results dataframe to augment, by default None to use 

236 :py:attr:`.ExperimentData.results_df` property. 

237 objectives : Iterable[str] 

238 Which result column(s) to use for sorting the configs, and in which 

239 direction ("min" or "max"). 

240 By default None to automatically select the :py:attr:`.ExperimentData.objectives`. 

241 top_n_configs : int 

242 How many configs to return, including the default, by default 10. 

243 method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean", 

244 Which statistical method to use when sorting the config groups before 

245 determining the cutoff, by default "mean". 

246 

247 Returns 

248 ------- 

249 (top_n_config_results_df, top_n_config_ids, orderby_cols) : 

250 Tuple[pandas.DataFrame, List[int], Dict[str, bool]] 

251 The filtered results dataframe, the config ids, and the columns used to 

252 order the configs. 

253 """ 

254 # Do some input checking first. 

255 if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]: 

256 raise ValueError(f"Invalid method: {method}") 

257 

258 # Prepare the orderby columns. 

259 (results_df, objs_cols) = expand_results_data_args( 

260 exp_data, 

261 results_df=results_df, 

262 objectives=objectives, 

263 ) 

264 assert isinstance(results_df, pandas.DataFrame) 

265 

266 # Augment the results dataframe with some useful stats. 

267 results_df = augment_results_df_with_config_trial_group_stats( 

268 exp_data=exp_data, 

269 results_df=results_df, 

270 requested_result_cols=objs_cols.keys(), 

271 ) 

272 # Note: mypy seems to lose its mind for some reason and keeps forgetting that 

273 # results_df is not None and is in fact a DataFrame, so we periodically assert 

274 # it in this func for now. 

275 assert results_df is not None 

276 orderby_cols: Dict[str, bool] = { 

277 obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items() 

278 } 

279 

280 config_id_col = "tunable_config_id" 

281 group_id_col = "tunable_config_trial_group_id" # first trial_id per config group 

282 trial_id_col = "trial_id" 

283 

284 default_config_id = ( 

285 results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id 

286 ) 

287 assert default_config_id is not None, "Failed to determine default config id." 

288 

289 # Filter out configs whose variance is too large. 

290 # But also make sure the default configs is still in the resulting dataframe 

291 # (for comparison purposes). 

292 for obj_col in objs_cols: 

293 assert results_df is not None 

294 if method == "mean": 

295 singletons_mask = results_df["tunable_config_trial_group_size"] == 1 

296 else: 

297 singletons_mask = results_df["tunable_config_trial_group_size"] > 1 

298 results_df = results_df.loc[ 

299 ( 

300 (results_df[f"{obj_col}.var_zscore"].abs() < 2) 

301 | (singletons_mask) 

302 | (results_df[config_id_col] == default_config_id) 

303 ) 

304 ] 

305 assert results_df is not None 

306 

307 # Also, filter results that are worse than the default. 

308 default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id] 

309 for orderby_col, ascending in orderby_cols.items(): 

310 default_vals = default_config_results_df[orderby_col].unique() 

311 assert len(default_vals) == 1 

312 default_val = default_vals[0] 

313 assert results_df is not None 

314 if ascending: 

315 results_df = results_df.loc[(results_df[orderby_col] <= default_val)] 

316 else: 

317 results_df = results_df.loc[(results_df[orderby_col] >= default_val)] 

318 

319 # Now regroup and filter to the top-N configs by their group performance dimensions. 

320 assert results_df is not None 

321 group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[ 

322 orderby_cols.keys() 

323 ] 

324 top_n_config_ids: List[int] = ( 

325 group_results_df.sort_values( 

326 by=list(orderby_cols.keys()), ascending=list(orderby_cols.values()) 

327 ) 

328 .head(top_n_configs) 

329 .index.tolist() 

330 ) 

331 

332 # Remove the default config if it's included. We'll add it back later. 

333 if default_config_id in top_n_config_ids: 

334 top_n_config_ids.remove(default_config_id) 

335 # Get just the top-n config results. 

336 # Sort by the group ids. 

337 top_n_config_results_df = results_df.loc[ 

338 (results_df[config_id_col].isin(top_n_config_ids)) 

339 ].sort_values([group_id_col, config_id_col, trial_id_col]) 

340 # Place the default config at the top of the list. 

341 top_n_config_ids.insert(0, default_config_id) 

342 top_n_config_results_df = pandas.concat( 

343 [default_config_results_df, top_n_config_results_df], 

344 axis=0, 

345 ) 

346 return (top_n_config_results_df, top_n_config_ids, orderby_cols) 

347 

348 

349def plot_optimizer_trends( 

350 exp_data: Optional[ExperimentData] = None, 

351 *, 

352 results_df: Optional[pandas.DataFrame] = None, 

353 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

354) -> None: 

355 """ 

356 Plots the optimizer trends for the Experiment. 

357 

358 Parameters 

359 ---------- 

360 exp_data : ExperimentData 

361 The ExperimentData (e.g., obtained from the storage layer) to plot. 

362 results_df : Optional[pandas.DataFrame] 

363 Optional results_df to plot. 

364 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property. 

365 objectives : Optional[Dict[str, Literal["min", "max"]]] 

366 Optional objectives to plot. 

367 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property. 

368 """ 

369 (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives) 

370 (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df) 

371 

372 for objective_column, ascending in obj_cols.items(): 

373 incumbent_column = objective_column + ".incumbent" 

374 

375 # Determine the mean of each config trial group to match the box plots. 

376 group_results_df = ( 

377 results_df.groupby(groupby_columns)[objective_column] 

378 .mean() 

379 .reset_index() 

380 .sort_values(groupby_columns) 

381 ) 

382 # 

383 # Note: technically the optimizer (usually) uses the *first* result for a 

384 # given config trial group before moving on to a new config (x-axis), so 

385 # plotting the mean may be slightly misleading when trying to understand the 

386 # actual path taken by the optimizer in case of high variance samples. 

387 # Here's a way to do that, though it can also be misleading if the optimizer 

388 # later gets a worse value for that config group as well. 

389 # 

390 # group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby( 

391 # groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index() 

392 

393 # Calculate the incumbent (best seen so far) 

394 if ascending: 

395 group_results_df[incumbent_column] = group_results_df[objective_column].cummin() 

396 else: 

397 group_results_df[incumbent_column] = group_results_df[objective_column].cummax() 

398 

399 (_fig, axis) = plt.subplots(figsize=(15, 5)) 

400 

401 # Result of each set of trials for a config 

402 sns.boxplot( 

403 data=results_df, 

404 x=groupby_column, 

405 y=objective_column, 

406 ax=axis, 

407 ) 

408 

409 # Results of the best so far. 

410 axis = sns.lineplot( 

411 data=group_results_df, 

412 x=groupby_column, 

413 y=incumbent_column, 

414 alpha=0.7, 

415 label="Mean of Incumbent Config Trial Group", 

416 ax=axis, 

417 ) 

418 

419 plt.yscale("log") 

420 plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")) 

421 

422 plt.xlabel("Config Trial Group ID, Config ID") 

423 plt.xticks(rotation=90, fontsize=8) 

424 

425 plt.title( 

426 "Optimizer Trends for Experiment: " + exp_data.experiment_id 

427 if exp_data is not None 

428 else "" 

429 ) 

430 plt.grid() 

431 plt.show() 

432 

433 

434def plot_top_n_configs( 

435 exp_data: Optional[ExperimentData] = None, 

436 *, 

437 results_df: Optional[pandas.DataFrame] = None, 

438 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

439 with_scatter_plot: bool = False, 

440 **kwargs: Any, 

441) -> None: 

442 # pylint: disable=too-many-locals 

443 """ 

444 Plots the top-N configs along with the default config for the given 

445 :py:class:`.ExperimentData`. 

446 

447 Intended to be used from a Jupyter notebook. 

448 

449 Parameters 

450 ---------- 

451 exp_data: ExperimentData 

452 The experiment data to plot. 

453 results_df : Optional[pandas.DataFrame] 

454 Optional results_df to plot. 

455 If not provided, defaults to :py:attr:`.ExperimentData.results_df` property. 

456 objectives : Optional[Dict[str, Literal["min", "max"]]] 

457 Optional objectives to plot. 

458 If not provided, defaults to :py:attr:`.ExperimentData.objectives` property. 

459 with_scatter_plot : bool 

460 Whether to also add scatter plot to the output figure. 

461 kwargs : dict 

462 Remaining keyword arguments are passed along to the 

463 :py:func:`limit_top_n_configs` function. 

464 """ 

465 (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives) 

466 top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs) 

467 if "results_df" not in top_n_config_args: 

468 top_n_config_args["results_df"] = results_df 

469 if "objectives" not in top_n_config_args: 

470 top_n_config_args["objectives"] = objectives 

471 (top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs( 

472 exp_data=exp_data, 

473 **top_n_config_args, 

474 ) 

475 

476 (top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column( 

477 top_n_config_results_df, 

478 ) 

479 top_n = len(top_n_config_results_df[groupby_column].unique()) - 1 

480 

481 for orderby_col, ascending in orderby_cols.items(): 

482 opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "") 

483 (_fig, axis) = plt.subplots() 

484 sns.violinplot( 

485 data=top_n_config_results_df, 

486 x=groupby_column, 

487 y=orderby_col, 

488 ax=axis, 

489 ) 

490 if with_scatter_plot: 

491 sns.scatterplot( 

492 data=top_n_config_results_df, 

493 x=groupby_column, 

494 y=orderby_col, 

495 legend=None, 

496 ax=axis, 

497 ) 

498 plt.grid() 

499 (xticks, xlabels) = plt.xticks() 

500 # default should be in the first position based on top_n_configs() return 

501 xlabels[0] = "default" # type: ignore[call-overload] 

502 plt.xticks(xticks, xlabels) # type: ignore[arg-type] 

503 plt.xlabel("Config Trial Group, Config ID") 

504 plt.xticks(rotation=90) 

505 plt.ylabel(opt_tgt) 

506 plt.yscale("log") 

507 extra_title = "(lower is better)" if ascending else "(lower is better)" 

508 plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}") 

509 plt.show()