Coverage for mlos_viz/mlos_viz/base.py: 90%

155 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-05 00:36 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Base functions for visualizing, explain, and gain insights from results. 

7""" 

8 

9from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Tuple, Union 

10 

11import re 

12import warnings 

13 

14from importlib.metadata import version 

15 

16from matplotlib import pyplot as plt 

17import pandas 

18from pandas.api.types import is_numeric_dtype 

19from pandas.core.groupby.generic import SeriesGroupBy 

20import seaborn as sns 

21 

22from mlos_bench.storage.base_experiment_data import ExperimentData 

23from mlos_viz.util import expand_results_data_args 

24 

25 

26_SEABORN_VERS = version('seaborn') 

27 

28 

29def _get_kwarg_defaults(target: Callable, **kwargs: Any) -> Dict[str, Any]: 

30 """ 

31 Assembles a smaller kwargs dict for the specified target function. 

32 

33 Note: this only works with non-positional kwargs (e.g., those after a * arg). 

34 """ 

35 target_kwargs = {} 

36 for kword in target.__kwdefaults__: # or {} # intentionally omitted for now 

37 if kword in kwargs: 

38 target_kwargs[kword] = kwargs[kword] 

39 return target_kwargs 

40 

41 

42def ignore_plotter_warnings() -> None: 

43 """ 

44 Suppress some annoying warnings from third-party data visualization packages by 

45 adding them to the warnings filter. 

46 """ 

47 warnings.filterwarnings("ignore", category=FutureWarning) 

48 if _SEABORN_VERS <= '0.13.1': 

49 warnings.filterwarnings("ignore", category=DeprecationWarning, module="seaborn", # but actually comes from pandas 

50 message="is_categorical_dtype is deprecated and will be removed in a future version.") 

51 

52 

53def _add_groupby_desc_column(results_df: pandas.DataFrame, 

54 groupby_columns: Optional[List[str]] = None, 

55 ) -> Tuple[pandas.DataFrame, List[str], str]: 

56 """ 

57 Adds a group descriptor column to the results_df. 

58 

59 Parameters 

60 ---------- 

61 results_df: ExperimentData 

62 The experiment data to add the descriptor column to. 

63 groupby_columns: Optional[List[str]] 

64 """ 

65 # Compose a new groupby_column for display purposes that is the 

66 # concatenation of the min trial_id (the first one) of each config trial 

67 # group and the config_id. 

68 # Note: It's need to be a string (e.g., categorical) for boxplot and lineplot to 

69 # be on the same axis anyways. 

70 if groupby_columns is None: 

71 groupby_columns = ["tunable_config_trial_group_id", "tunable_config_id"] 

72 groupby_column = ",".join(groupby_columns) 

73 results_df[groupby_column] = results_df[groupby_columns].astype(str).apply( 

74 lambda x: ",".join(x), axis=1) # pylint: disable=unnecessary-lambda 

75 groupby_columns.append(groupby_column) 

76 return (results_df, groupby_columns, groupby_column) 

77 

78 

79def augment_results_df_with_config_trial_group_stats(exp_data: Optional[ExperimentData] = None, 

80 *, 

81 results_df: Optional[pandas.DataFrame] = None, 

82 requested_result_cols: Optional[Iterable[str]] = None, 

83 ) -> pandas.DataFrame: 

84 # pylint: disable=too-complex 

85 """ 

86 Add a number of useful statistical measure columns to the results dataframe. 

87 

88 In particular, for each numeric result, we add the following columns for each 

89 requested result column: 

90 

91 - ".p50": the median of each config trial group results 

92 

93 - ".p75": the p75 of each config trial group results 

94 

95 - ".p90": the p90 of each config trial group results 

96 

97 - ".p95": the p95 of each config trial group results 

98 

99 - ".p99": the p95 of each config trial group results 

100 

101 - ".mean": the mean of each config trial group results 

102 

103 - ".stddev": the mean of each config trial group results 

104 

105 - ".var": the variance of each config trial group results 

106 

107 - ".var_zscore": the zscore of this group (i.e., variance relative to the stddev 

108 of all group variances). This can be useful for filtering out outliers (e.g., 

109 configs with high variance relative to others by restricting to abs < 2 to 

110 remove those two standard deviations from the mean variance across all config 

111 trial groups). 

112 

113 Additionally, we add a "tunable_config_trial_group_size" column that indicates 

114 the number of trials using a particular config. 

115 

116 Parameters 

117 ---------- 

118 exp_data : ExperimentData 

119 The ExperimentData (e.g., obtained from the storage layer) to plot. 

120 results_df : Optional[pandas.DataFrame] 

121 The results dataframe to augment, by default None to use the results_df property. 

122 requested_result_cols : Optional[Iterable[str]] 

123 Which results columns to augment, by default None to use all results columns 

124 that look numeric. 

125 

126 Returns 

127 ------- 

128 pandas.DataFrame 

129 The augmented results dataframe. 

130 """ 

131 if results_df is None: 

132 if exp_data is None: 

133 raise ValueError("Either exp_data or results_df must be provided.") 

134 results_df = exp_data.results_df 

135 results_groups = results_df.groupby("tunable_config_id") 

136 if len(results_groups) <= 1: 

137 raise ValueError(f"Not enough data: {len(results_groups)}") 

138 

139 if requested_result_cols is None: 

140 result_cols = set(col for col in results_df.columns if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX)) 

141 else: 

142 result_cols = set(col for col in requested_result_cols 

143 if col.startswith(ExperimentData.RESULT_COLUMN_PREFIX) and col in results_df.columns) 

144 result_cols.update(set(ExperimentData.RESULT_COLUMN_PREFIX + col for col in requested_result_cols 

145 if ExperimentData.RESULT_COLUMN_PREFIX in results_df.columns)) 

146 

147 def compute_zscore_for_group_agg( 

148 results_groups_perf: "SeriesGroupBy", 

149 stats_df: pandas.DataFrame, 

150 result_col: str, 

151 agg: Union[Literal["mean"], Literal["var"], Literal["std"]] 

152 ) -> None: 

153 results_groups_perf_aggs = results_groups_perf.agg(agg) # TODO: avoid recalculating? 

154 # Compute the zscore of the chosen aggregate performance of each group into each row in the dataframe. 

155 stats_df[result_col + f".{agg}_mean"] = results_groups_perf_aggs.mean() 

156 stats_df[result_col + f".{agg}_stddev"] = results_groups_perf_aggs.std() 

157 stats_df[result_col + f".{agg}_zscore"] = \ 

158 (stats_df[result_col + f".{agg}"] - stats_df[result_col + f".{agg}_mean"]) \ 

159 / stats_df[result_col + f".{agg}_stddev"] 

160 stats_df.drop(columns=[result_col + ".var_" + agg for agg in ("mean", "stddev")], inplace=True) 

161 

162 augmented_results_df = results_df 

163 augmented_results_df["tunable_config_trial_group_size"] = results_groups["trial_id"].transform("count") 

164 for result_col in result_cols: 

165 if not result_col.startswith(ExperimentData.RESULT_COLUMN_PREFIX): 

166 continue 

167 if re.search(r"(start|end).*time", result_col, flags=re.IGNORECASE): 

168 # Ignore computing variance on things like that look like timestamps. 

169 continue 

170 if not is_numeric_dtype(results_df[result_col]): 

171 continue 

172 if results_df[result_col].unique().size == 1: 

173 continue 

174 results_groups_perf = results_groups[result_col] 

175 stats_df = pandas.DataFrame() 

176 stats_df[result_col + ".mean"] = results_groups_perf.transform("mean", numeric_only=True) 

177 stats_df[result_col + ".var"] = results_groups_perf.transform("var") 

178 stats_df[result_col + ".stddev"] = stats_df[result_col + ".var"].apply(lambda x: x**0.5) 

179 

180 compute_zscore_for_group_agg(results_groups_perf, stats_df, result_col, "var") 

181 quantiles = [0.50, 0.75, 0.90, 0.95, 0.99] 

182 for quantile in quantiles: # TODO: can we do this in one pass? 

183 quantile_col = result_col + f".p{int(quantile*100)}" 

184 stats_df[quantile_col] = results_groups_perf.transform("quantile", quantile) 

185 augmented_results_df = pandas.concat([augmented_results_df, stats_df], axis=1) 

186 return augmented_results_df 

187 

188 

189def limit_top_n_configs(exp_data: Optional[ExperimentData] = None, 

190 *, 

191 results_df: Optional[pandas.DataFrame] = None, 

192 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

193 top_n_configs: int = 10, 

194 method: Literal["mean", "p50", "p75", "p90", "p95", "p99"] = "mean", 

195 ) -> Tuple[pandas.DataFrame, List[int], Dict[str, bool]]: 

196 # pylint: disable=too-many-locals 

197 """ 

198 Utility function to process the results and determine the best performing 

199 configs including potential repeats to help assess variability. 

200 

201 Parameters 

202 ---------- 

203 exp_data : Optional[ExperimentData] 

204 The ExperimentData (e.g., obtained from the storage layer) to operate on. 

205 results_df : Optional[pandas.DataFrame] 

206 The results dataframe to augment, by default None to use the results_df property. 

207 objectives : Iterable[str], optional 

208 Which result column(s) to use for sorting the configs, and in which direction ("min" or "max"). 

209 By default None to automatically select the experiment objectives. 

210 top_n_configs : int, optional 

211 How many configs to return, including the default, by default 20. 

212 method: Literal["mean", "median", "p50", "p75", "p90", "p95", "p99"] = "mean", 

213 Which statistical method to use when sorting the config groups before determining the cutoff, by default "mean". 

214 

215 Returns 

216 ------- 

217 (top_n_config_results_df, top_n_config_ids, orderby_cols) : Tuple[pandas.DataFrame, List[int], Dict[str, bool]] 

218 The filtered results dataframe, the config ids, and the columns used to order the configs. 

219 """ 

220 # Do some input checking first. 

221 if method not in ["mean", "median", "p50", "p75", "p90", "p95", "p99"]: 

222 raise ValueError(f"Invalid method: {method}") 

223 

224 # Prepare the orderby columns. 

225 (results_df, objs_cols) = expand_results_data_args(exp_data, results_df=results_df, objectives=objectives) 

226 assert isinstance(results_df, pandas.DataFrame) 

227 

228 # Augment the results dataframe with some useful stats. 

229 results_df = augment_results_df_with_config_trial_group_stats( 

230 exp_data=exp_data, 

231 results_df=results_df, 

232 requested_result_cols=objs_cols.keys(), 

233 ) 

234 # Note: mypy seems to lose its mind for some reason and keeps forgetting that 

235 # results_df is not None and is in fact a DataFrame, so we periodically assert 

236 # it in this func for now. 

237 assert results_df is not None 

238 orderby_cols: Dict[str, bool] = {obj_col + f".{method}": ascending for (obj_col, ascending) in objs_cols.items()} 

239 

240 config_id_col = "tunable_config_id" 

241 group_id_col = "tunable_config_trial_group_id" # first trial_id per config group 

242 trial_id_col = "trial_id" 

243 

244 default_config_id = results_df[trial_id_col].min() if exp_data is None else exp_data.default_tunable_config_id 

245 assert default_config_id is not None, "Failed to determine default config id." 

246 

247 # Filter out configs whose variance is too large. 

248 # But also make sure the default configs is still in the resulting dataframe 

249 # (for comparison purposes). 

250 for obj_col in objs_cols: 

251 assert results_df is not None 

252 if method == "mean": 

253 singletons_mask = results_df["tunable_config_trial_group_size"] == 1 

254 else: 

255 singletons_mask = results_df["tunable_config_trial_group_size"] > 1 

256 results_df = results_df.loc[( 

257 (results_df[f"{obj_col}.var_zscore"].abs() < 2) 

258 | (singletons_mask) 

259 | (results_df[config_id_col] == default_config_id) 

260 )] 

261 assert results_df is not None 

262 

263 # Also, filter results that are worse than the default. 

264 default_config_results_df = results_df.loc[results_df[config_id_col] == default_config_id] 

265 for (orderby_col, ascending) in orderby_cols.items(): 

266 default_vals = default_config_results_df[orderby_col].unique() 

267 assert len(default_vals) == 1 

268 default_val = default_vals[0] 

269 assert results_df is not None 

270 if ascending: 

271 results_df = results_df.loc[(results_df[orderby_col] <= default_val)] 

272 else: 

273 results_df = results_df.loc[(results_df[orderby_col] >= default_val)] 

274 

275 # Now regroup and filter to the top-N configs by their group performance dimensions. 

276 assert results_df is not None 

277 group_results_df: pandas.DataFrame = results_df.groupby(config_id_col).first()[orderby_cols.keys()] 

278 top_n_config_ids: List[int] = group_results_df.sort_values( 

279 by=list(orderby_cols.keys()), ascending=list(orderby_cols.values())).head(top_n_configs).index.tolist() 

280 

281 # Remove the default config if it's included. We'll add it back later. 

282 if default_config_id in top_n_config_ids: 

283 top_n_config_ids.remove(default_config_id) 

284 # Get just the top-n config results. 

285 # Sort by the group ids. 

286 top_n_config_results_df = results_df.loc[( 

287 results_df[config_id_col].isin(top_n_config_ids) 

288 )].sort_values([group_id_col, config_id_col, trial_id_col]) 

289 # Place the default config at the top of the list. 

290 top_n_config_ids.insert(0, default_config_id) 

291 top_n_config_results_df = pandas.concat([default_config_results_df, top_n_config_results_df], axis=0) 

292 return (top_n_config_results_df, top_n_config_ids, orderby_cols) 

293 

294 

295def plot_optimizer_trends( 

296 exp_data: Optional[ExperimentData] = None, 

297 *, 

298 results_df: Optional[pandas.DataFrame] = None, 

299 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

300) -> None: 

301 """ 

302 Plots the optimizer trends for the Experiment. 

303 

304 Parameters 

305 ---------- 

306 exp_data : ExperimentData 

307 The ExperimentData (e.g., obtained from the storage layer) to plot. 

308 results_df : Optional["pandas.DataFrame"] 

309 Optional results_df to plot. 

310 If not provided, defaults to exp_data.results_df property. 

311 objectives : Optional[Dict[str, Literal["min", "max"]]] 

312 Optional objectives to plot. 

313 If not provided, defaults to exp_data.objectives property. 

314 """ 

315 (results_df, obj_cols) = expand_results_data_args(exp_data, results_df, objectives) 

316 (results_df, groupby_columns, groupby_column) = _add_groupby_desc_column(results_df) 

317 

318 for (objective_column, ascending) in obj_cols.items(): 

319 incumbent_column = objective_column + ".incumbent" 

320 

321 # Determine the mean of each config trial group to match the box plots. 

322 group_results_df = results_df.groupby(groupby_columns)[objective_column].mean()\ 

323 .reset_index().sort_values(groupby_columns) 

324 # 

325 # Note: technically the optimizer (usually) uses the *first* result for a 

326 # given config trial group before moving on to a new config (x-axis), so 

327 # plotting the mean may be slightly misleading when trying to understand the 

328 # actual path taken by the optimizer in case of high variance samples. 

329 # Here's a way to do that, though it can also be misleading if the optimizer 

330 # later gets a worse value for that config group as well. 

331 # 

332 # group_results_df = results_df.sort_values(groupby_columns + ["trial_id"]).groupby( 

333 # groupby_columns).head(1)[groupby_columns + [objective_column]].reset_index() 

334 

335 # Calculate the incumbent (best seen so far) 

336 if ascending: 

337 group_results_df[incumbent_column] = group_results_df[objective_column].cummin() 

338 else: 

339 group_results_df[incumbent_column] = group_results_df[objective_column].cummax() 

340 

341 (_fig, axis) = plt.subplots(figsize=(15, 5)) 

342 

343 # Result of each set of trials for a config 

344 sns.boxplot( 

345 data=results_df, 

346 x=groupby_column, 

347 y=objective_column, 

348 ax=axis, 

349 ) 

350 

351 # Results of the best so far. 

352 axis = sns.lineplot( 

353 data=group_results_df, 

354 x=groupby_column, 

355 y=incumbent_column, 

356 alpha=0.7, 

357 label="Mean of Incumbent Config Trial Group", 

358 ax=axis, 

359 ) 

360 

361 plt.yscale('log') 

362 plt.ylabel(objective_column.replace(ExperimentData.RESULT_COLUMN_PREFIX, "")) 

363 

364 plt.xlabel("Config Trial Group ID, Config ID") 

365 plt.xticks(rotation=90, fontsize=8) 

366 

367 plt.title("Optimizer Trends for Experiment: " + exp_data.experiment_id if exp_data is not None else "") 

368 plt.grid() 

369 plt.show() # type: ignore[no-untyped-call] 

370 

371 

372def plot_top_n_configs(exp_data: Optional[ExperimentData] = None, 

373 *, 

374 results_df: Optional[pandas.DataFrame] = None, 

375 objectives: Optional[Dict[str, Literal["min", "max"]]] = None, 

376 with_scatter_plot: bool = False, 

377 **kwargs: Any, 

378 ) -> None: 

379 # pylint: disable=too-many-locals 

380 """ 

381 Plots the top-N configs along with the default config for the given ExperimentData. 

382 

383 Intended to be used from a Jupyter notebook. 

384 

385 Parameters 

386 ---------- 

387 exp_data: ExperimentData 

388 The experiment data to plot. 

389 results_df : Optional["pandas.DataFrame"] 

390 Optional results_df to plot. 

391 If not provided, defaults to exp_data.results_df property. 

392 objectives : Optional[Dict[str, Literal["min", "max"]]] 

393 Optional objectives to plot. 

394 If not provided, defaults to exp_data.objectives property. 

395 with_scatter_plot : bool 

396 Whether to also add scatter plot to the output figure. 

397 kwargs : dict 

398 Remaining keyword arguments are passed along to the limit_top_n_configs function. 

399 """ 

400 (results_df, _obj_cols) = expand_results_data_args(exp_data, results_df, objectives) 

401 top_n_config_args = _get_kwarg_defaults(limit_top_n_configs, **kwargs) 

402 if "results_df" not in top_n_config_args: 

403 top_n_config_args["results_df"] = results_df 

404 if "objectives" not in top_n_config_args: 

405 top_n_config_args["objectives"] = objectives 

406 (top_n_config_results_df, _top_n_config_ids, orderby_cols) = limit_top_n_configs(exp_data=exp_data, **top_n_config_args) 

407 

408 (top_n_config_results_df, _groupby_columns, groupby_column) = _add_groupby_desc_column(top_n_config_results_df) 

409 top_n = len(top_n_config_results_df[groupby_column].unique()) - 1 

410 

411 for (orderby_col, ascending) in orderby_cols.items(): 

412 opt_tgt = orderby_col.replace(ExperimentData.RESULT_COLUMN_PREFIX, "") 

413 (_fig, axis) = plt.subplots() 

414 sns.violinplot( 

415 data=top_n_config_results_df, 

416 x=groupby_column, 

417 y=orderby_col, 

418 ax=axis, 

419 ) 

420 if with_scatter_plot: 

421 sns.scatterplot( 

422 data=top_n_config_results_df, 

423 x=groupby_column, 

424 y=orderby_col, 

425 legend=None, 

426 ax=axis, 

427 ) 

428 plt.grid() 

429 (xticks, xlabels) = plt.xticks() 

430 # default should be in the first position based on top_n_configs() return 

431 xlabels[0] = "default" # type: ignore[call-overload] 

432 plt.xticks(xticks, xlabels) # type: ignore[arg-type] 

433 plt.xlabel("Config Trial Group, Config ID") 

434 plt.xticks(rotation=90) 

435 plt.ylabel(opt_tgt) 

436 plt.yscale('log') 

437 extra_title = "(lower is better)" if ascending else "(lower is better)" 

438 plt.title(f"Top {top_n} configs {opt_tgt} {extra_title}") 

439 plt.show() # type: ignore[no-untyped-call]