Coverage for mlos_core/mlos_core/data_classes.py: 93%

157 statements  

« prev     ^ index     » next       coverage.py v7.9.2, created at 2025-07-14 00:55 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6Data classes for ``mlos_core`` used to pass around configurations, observations, and 

7suggestions. 

8 

9``mlos_core`` uses :external:py:mod:`pandas` :external:py:class:`~pandas.DataFrame` 

10s and :external:py:class:`~pandas.Series` to represent configurations and scores and 

11context (information about where the configuration was evaluated). 

12 

13These modules encapsulate tuples of those for easier passing around and manipulation. 

14""" 

15from collections.abc import Iterable, Iterator 

16from typing import Any 

17 

18import pandas as pd 

19from ConfigSpace import Configuration, ConfigurationSpace 

20 

21from mlos_core.util import compare_optional_dataframe, compare_optional_series 

22 

23 

24class Observation: 

25 """A single observation of a configuration.""" 

26 

27 def __init__( 

28 self, 

29 *, 

30 config: pd.Series, 

31 score: pd.Series = pd.Series(), 

32 context: pd.Series | None = None, 

33 metadata: pd.Series | None = None, 

34 ): 

35 """ 

36 Creates a new Observation object. 

37 

38 Parameters 

39 ---------- 

40 config : pandas.Series 

41 The configuration observed. 

42 score : pandas.Series 

43 The score metrics observed. 

44 context : pandas.Series | None 

45 The context in which the configuration was evaluated. 

46 Not Yet Implemented. 

47 metadata: pandas.Series | None 

48 The metadata in which the configuration was evaluated 

49 """ 

50 self._config = config 

51 self._score = score 

52 self._context = context 

53 self._metadata = metadata 

54 

55 @property 

56 def config(self) -> pd.Series: 

57 """Gets (a copy of) the config of the Observation.""" 

58 return self._config.copy() 

59 

60 @property 

61 def score(self) -> pd.Series: 

62 """Gets (a copy of) the score of the Observation.""" 

63 return self._score.copy() 

64 

65 @property 

66 def context(self) -> pd.Series | None: 

67 """Gets (a copy of) the context of the Observation.""" 

68 return self._context.copy() if self._context is not None else None 

69 

70 @property 

71 def metadata(self) -> pd.Series | None: 

72 """Gets (a copy of) the metadata of the Observation.""" 

73 return self._metadata.copy() if self._metadata is not None else None 

74 

75 def to_suggestion(self) -> "Suggestion": 

76 """ 

77 Converts the observation to a suggestion. 

78 

79 Returns 

80 ------- 

81 Suggestion 

82 The suggestion. 

83 """ 

84 return Suggestion( 

85 config=self.config, 

86 context=self.context, 

87 metadata=self.metadata, 

88 ) 

89 

90 def __repr__(self) -> str: 

91 return ( 

92 f"Observation(config={self._config}, score={self._score}, " 

93 f"context={self._context}, metadata={self._metadata})" 

94 ) 

95 

96 def __eq__(self, other: Any) -> bool: 

97 if not isinstance(other, Observation): 

98 return False 

99 

100 if not self._config.equals(other._config): 

101 return False 

102 if not self._score.equals(other._score): 

103 return False 

104 if not compare_optional_series(self._context, other._context): 

105 return False 

106 if not compare_optional_series(self._metadata, other._metadata): 

107 return False 

108 

109 return True 

110 

111 def __ne__(self, other: Any) -> bool: 

112 return not self.__eq__(other) 

113 

114 

115class Observations: 

116 """A set of observations of a configuration scores.""" 

117 

118 def __init__( # pylint: disable=too-many-arguments 

119 self, 

120 *, 

121 configs: pd.DataFrame = pd.DataFrame(), 

122 scores: pd.DataFrame = pd.DataFrame(), 

123 contexts: pd.DataFrame | None = None, 

124 metadata: pd.DataFrame | None = None, 

125 observations: Iterable[Observation] | None = None, 

126 ): 

127 """ 

128 Creates a new Observation object. 

129 

130 Can accept either a set of Observations or a collection of aligned config and 

131 score (and optionally context) dataframes. 

132 

133 If both are provided the two sets will be merged. 

134 

135 Parameters 

136 ---------- 

137 configs : pandas.DataFrame 

138 Pandas dataframe containing configurations. Column names are the parameter names. 

139 scores : pandas.DataFrame 

140 The score metrics observed in a dataframe. 

141 contexts : pandas.DataFrame | None 

142 The context in which the configuration was evaluated. 

143 Not Yet Implemented. 

144 metadata: pandas.DataFrame | None 

145 The metadata in which the configuration was evaluated 

146 Not Yet Implemented. 

147 """ 

148 if observations is None: 

149 observations = [] 

150 if any(observations): 

151 configs = pd.concat([obs.config.to_frame().T for obs in observations]) 

152 scores = pd.concat([obs.score.to_frame().T for obs in observations]) 

153 

154 if sum(obs.context is None for obs in observations) == 0: 

155 contexts = pd.concat( 

156 [obs.context.to_frame().T for obs in observations] # type: ignore[union-attr] 

157 ) 

158 else: 

159 contexts = None 

160 if sum(obs.metadata is None for obs in observations) == 0: 

161 metadata = pd.concat( 

162 [obs.metadata.to_frame().T for obs in observations] # type: ignore[union-attr] 

163 ) 

164 else: 

165 metadata = None 

166 assert len(configs.index) == len( 

167 scores.index 

168 ), "config and score must have the same length" 

169 if contexts is not None: 

170 assert len(configs.index) == len( 

171 contexts.index 

172 ), "config and context must have the same length" 

173 if metadata is not None: 

174 assert len(configs.index) == len( 

175 metadata.index 

176 ), "config and metadata must have the same length" 

177 self._configs = configs.reset_index(drop=True) 

178 self._scores = scores.reset_index(drop=True) 

179 self._contexts = None if contexts is None else contexts.reset_index(drop=True) 

180 self._metadata = None if metadata is None else metadata.reset_index(drop=True) 

181 

182 @property 

183 def configs(self) -> pd.DataFrame: 

184 """Gets a copy of the configs of the Observations.""" 

185 return self._configs.copy() 

186 

187 @property 

188 def scores(self) -> pd.DataFrame: 

189 """Gets a copy of the scores of the Observations.""" 

190 return self._scores.copy() 

191 

192 @property 

193 def contexts(self) -> pd.DataFrame | None: 

194 """Gets a copy of the contexts of the Observations.""" 

195 return self._contexts.copy() if self._contexts is not None else None 

196 

197 @property 

198 def metadata(self) -> pd.DataFrame | None: 

199 """Gets a copy of the metadata of the Observations.""" 

200 return self._metadata.copy() if self._metadata is not None else None 

201 

202 def filter_by_index(self, index: pd.Index) -> "Observations": 

203 """ 

204 Filters the observation by the given indices. 

205 

206 Parameters 

207 ---------- 

208 index : pandas.Index 

209 The indices to filter by. 

210 

211 Returns 

212 ------- 

213 Observation 

214 The filtered observation. 

215 """ 

216 return Observations( 

217 configs=self._configs.loc[index].copy(), 

218 scores=self._scores.loc[index].copy(), 

219 contexts=None if self._contexts is None else self._contexts.loc[index].copy(), 

220 metadata=None if self._metadata is None else self._metadata.loc[index].copy(), 

221 ) 

222 

223 def append(self, observation: Observation) -> None: 

224 """ 

225 Appends the given observation to this observation. 

226 

227 Parameters 

228 ---------- 

229 observation : Observation 

230 The observation to append. 

231 """ 

232 config = observation.config.to_frame().T 

233 score = observation.score.to_frame().T 

234 context = None if observation.context is None else observation.context.to_frame().T 

235 metadata = None if observation.metadata is None else observation.metadata.to_frame().T 

236 if len(self._configs.index) == 0: 

237 self._configs = config 

238 self._scores = score 

239 self._contexts = context 

240 self._metadata = metadata 

241 assert set(self.configs.index) == set( 

242 self.scores.index 

243 ), "config and score must have the same index" 

244 return 

245 

246 self._configs = pd.concat([self._configs, config]).reset_index(drop=True) 

247 self._scores = pd.concat([self._scores, score]).reset_index(drop=True) 

248 assert set(self.configs.index) == set( 

249 self.scores.index 

250 ), "config and score must have the same index" 

251 

252 if self._contexts is not None: 

253 assert context is not None, ( 

254 "context of appending observation must not be null " 

255 "if context of prior observation is not null" 

256 ) 

257 self._contexts = pd.concat([self._contexts, context]).reset_index(drop=True) 

258 assert self._configs.index.equals( 

259 self._contexts.index 

260 ), "config and context must have the same index" 

261 else: 

262 assert context is None, ( 

263 "context of appending observation must be null " 

264 "if context of prior observation is null" 

265 ) 

266 if self._metadata is not None: 

267 assert metadata is not None, ( 

268 "context of appending observation must not be null " 

269 "if metadata of prior observation is not null" 

270 ) 

271 self._metadata = pd.concat([self._metadata, metadata]).reset_index(drop=True) 

272 assert self._configs.index.equals( 

273 self._metadata.index 

274 ), "config and metadata must have the same index" 

275 else: 

276 assert metadata is None, ( 

277 "context of appending observation must be null " 

278 "if metadata of prior observation is null" 

279 ) 

280 

281 def __len__(self) -> int: 

282 return len(self._configs.index) 

283 

284 def __iter__(self) -> Iterator["Observation"]: 

285 for idx in self._configs.index: 

286 config = self._configs.loc[idx] 

287 assert isinstance(config, pd.Series) 

288 score = self._scores.loc[idx] 

289 assert isinstance(score, pd.Series) 

290 context = self._contexts.loc[idx] if self._contexts is not None else None 

291 assert isinstance(context, (pd.Series, type(None))) 

292 metadata = self._metadata.loc[idx] if self._metadata is not None else None 

293 assert isinstance(metadata, (pd.Series, type(None))) 

294 yield Observation( 

295 config=config, 

296 score=score, 

297 context=context, 

298 metadata=metadata, 

299 ) 

300 

301 def __repr__(self) -> str: 

302 return ( 

303 f"Observation(configs={self._configs}, score={self._scores}, " 

304 "contexts={self._contexts}, metadata={self._metadata})" 

305 ) 

306 

307 def __eq__(self, other: Any) -> bool: 

308 if not isinstance(other, Observations): 

309 return False 

310 

311 if not self._configs.equals(other._configs): 

312 return False 

313 if not self._scores.equals(other._scores): 

314 return False 

315 if not compare_optional_dataframe(self._contexts, other._contexts): 

316 return False 

317 if not compare_optional_dataframe(self._metadata, other._metadata): 

318 return False 

319 

320 return True 

321 

322 # required as per: https://stackoverflow.com/questions/30643236/does-ne-use-an-overridden-eq 

323 def __ne__(self, other: Any) -> bool: 

324 return not self.__eq__(other) 

325 

326 

327class Suggestion: 

328 """ 

329 A single suggestion for a configuration. 

330 

331 A Suggestion is an Observation that has not yet been scored. Evaluating the 

332 Suggestion and calling `complete(scores)` can convert it to an Observation. 

333 """ 

334 

335 def __init__( 

336 self, 

337 *, 

338 config: pd.Series, 

339 context: pd.Series | None = None, 

340 metadata: pd.Series | None = None, 

341 ): 

342 """ 

343 Creates a new Suggestion. 

344 

345 Parameters 

346 ---------- 

347 config : pandas.Series 

348 The configuration suggested. 

349 context : pandas.Series | None 

350 The context for this suggestion, by default None 

351 metadata : pandas.Series | None 

352 Any metadata provided by the underlying optimizer, by default None 

353 """ 

354 self._config = config 

355 self._context = context 

356 self._metadata = metadata 

357 

358 @property 

359 def config(self) -> pd.Series: 

360 """Gets (a copy of) the config of the Suggestion.""" 

361 return self._config.copy() 

362 

363 @property 

364 def context(self) -> pd.Series | None: 

365 """Gets (a copy of) the context of the Suggestion.""" 

366 return self._context.copy() if self._context is not None else None 

367 

368 @property 

369 def metadata(self) -> pd.Series | None: 

370 """Gets (a copy of) the metadata of the Suggestion.""" 

371 return self._metadata.copy() if self._metadata is not None else None 

372 

373 def complete(self, score: pd.Series) -> Observation: 

374 """ 

375 Completes the Suggestion by adding a score to turn it into an Observation. 

376 

377 Parameters 

378 ---------- 

379 score : pandas.Series 

380 The score metrics observed. 

381 

382 Returns 

383 ------- 

384 Observation 

385 The observation of the suggestion. 

386 """ 

387 return Observation( 

388 config=self.config, 

389 score=score, 

390 context=self.context, 

391 metadata=self.metadata, 

392 ) 

393 

394 def to_configspace_config(self, space: ConfigurationSpace) -> Configuration: 

395 """ 

396 Convert a Configuration Space to a Configuration. 

397 

398 Parameters 

399 ---------- 

400 space : ConfigSpace.ConfigurationSpace 

401 The ConfigurationSpace to be converted. 

402 

403 Returns 

404 ------- 

405 ConfigSpace.Configuration 

406 The output Configuration. 

407 """ 

408 return Configuration(space, values=self._config.dropna().to_dict()) 

409 

410 def __repr__(self) -> str: 

411 return ( 

412 f"Suggestion(config={self._config}, context={self._context}, " 

413 "metadata={self._metadata})" 

414 ) 

415 

416 def __eq__(self, other: Any) -> bool: 

417 if not isinstance(other, Suggestion): 

418 return False 

419 

420 if not self._config.equals(other._config): 

421 return False 

422 if not compare_optional_series(self._context, other._context): 

423 return False 

424 if not compare_optional_series(self._metadata, other._metadata): 

425 return False 

426 

427 return True 

428 

429 def __ne__(self, other: Any) -> bool: 

430 return not self.__eq__(other)