Coverage for mlos_core/mlos_core/data

2# Copyright (c) Microsoft Corporation.

3# Licensed under the MIT License.

5"""

6Data classes for ``mlos_core`` used to pass around configurations, observations, and

7suggestions.

9``mlos_core`` uses :external:py:mod:`pandas` :external:py:class:`~pandas.DataFrame`

10s and :external:py:class:`~pandas.Series` to represent configurations and scores and

11context (information about where the configuration was evaluated).

13These modules encapsulate tuples of those for easier passing around and manipulation.

14"""

15from typing import Any, Iterable, Iterator, Optional

17import pandas as pd

18from ConfigSpace import Configuration, ConfigurationSpace

20from mlos_core.util import compare_optional_dataframe, compare_optional_series

23class Observation:

24 """A single observation of a configuration."""

26 def __init__(

27 self,

28 *,

29 config: pd.Series,

30 score: pd.Series = pd.Series(),

31 context: Optional[pd.Series] = None,

32 metadata: Optional[pd.Series] = None,

33 ):

34 """

35 Creates a new Observation object.

37 Parameters

38 ----------

39 config : pandas.Series

40 The configuration observed.

41 score : pandas.Series

42 The score metrics observed.

43 context : Optional[pandas.Series]

44 The context in which the configuration was evaluated.

45 Not Yet Implemented.

46 metadata: Optional[pandas.Series]

47 The metadata in which the configuration was evaluated

48 """

49 self._config = config

50 self._score = score

51 self._context = context

52 self._metadata = metadata

54 @property

55 def config(self) -> pd.Series:

56 """Gets (a copy of) the config of the Observation."""

57 return self._config.copy()

59 @property

60 def score(self) -> pd.Series:

61 """Gets (a copy of) the score of the Observation."""

62 return self._score.copy()

64 @property

65 def context(self) -> Optional[pd.Series]:

66 """Gets (a copy of) the context of the Observation."""

67 return self._context.copy() if self._context is not None else None

69 @property

70 def metadata(self) -> Optional[pd.Series]:

71 """Gets (a copy of) the metadata of the Observation."""

72 return self._metadata.copy() if self._metadata is not None else None

74 def to_suggestion(self) -> "Suggestion":

75 """

76 Converts the observation to a suggestion.

78 Returns

79 -------

80 Suggestion

81 The suggestion.

82 """

83 return Suggestion(

84 config=self.config,

85 context=self.context,

86 metadata=self.metadata,

87 )

89 def __repr__(self) -> str:

90 return (

91 f"Observation(config={self._config}, score={self._score}, "

92 f"context={self._context}, metadata={self._metadata})"

93 )

95 def __eq__(self, other: Any) -> bool:

96 if not isinstance(other, Observation):

97 return False

99 if not self._config.equals(other._config):

100 return False

101 if not self._score.equals(other._score):

102 return False

103 if not compare_optional_series(self._context, other._context):

104 return False

105 if not compare_optional_series(self._metadata, other._metadata):

106 return False

107

108 return True

109

110 def __ne__(self, other: Any) -> bool:

111 return not self.__eq__(other)

112

113

114class Observations:

115 """A set of observations of a configuration scores."""

116

117 def __init__( # pylint: disable=too-many-arguments

118 self,

119 *,

120 configs: pd.DataFrame = pd.DataFrame(),

121 scores: pd.DataFrame = pd.DataFrame(),

122 contexts: Optional[pd.DataFrame] = None,

123 metadata: Optional[pd.DataFrame] = None,

124 observations: Optional[Iterable[Observation]] = None,

125 ):

126 """

127 Creates a new Observation object.

128

129 Can accept either a set of Observations or a collection of aligned config and

130 score (and optionally context) dataframes.

131

132 If both are provided the two sets will be merged.

133

134 Parameters

135 ----------

136 configs : pandas.DataFrame

137 Pandas dataframe containing configurations. Column names are the parameter names.

138 scores : pandas.DataFrame

139 The score metrics observed in a dataframe.

140 contexts : Optional[pandas.DataFrame]

141 The context in which the configuration was evaluated.

142 Not Yet Implemented.

143 metadata: Optional[pandas.DataFrame]

144 The metadata in which the configuration was evaluated

145 Not Yet Implemented.

146 """

147 if observations is None:

148 observations = []

149 if any(observations):

150 configs = pd.concat([obs.config.to_frame().T for obs in observations])

151 scores = pd.concat([obs.score.to_frame().T for obs in observations])

152

153 if sum(obs.context is None for obs in observations) == 0:

154 contexts = pd.concat(

155 [obs.context.to_frame().T for obs in observations] # type: ignore[union-attr]

156 )

157 else:

158 contexts = None

159 if sum(obs.metadata is None for obs in observations) == 0:

160 metadata = pd.concat(

161 [obs.metadata.to_frame().T for obs in observations] # type: ignore[union-attr]

162 )

163 else:

164 metadata = None

165 assert len(configs.index) == len(

166 scores.index

167 ), "config and score must have the same length"

168 if contexts is not None:

169 assert len(configs.index) == len(

170 contexts.index

171 ), "config and context must have the same length"

172 if metadata is not None:

173 assert len(configs.index) == len(

174 metadata.index

175 ), "config and metadata must have the same length"

176 self._configs = configs.reset_index(drop=True)

177 self._scores = scores.reset_index(drop=True)

178 self._contexts = None if contexts is None else contexts.reset_index(drop=True)

179 self._metadata = None if metadata is None else metadata.reset_index(drop=True)

180

181 @property

182 def configs(self) -> pd.DataFrame:

183 """Gets a copy of the configs of the Observations."""

184 return self._configs.copy()

185

186 @property

187 def scores(self) -> pd.DataFrame:

188 """Gets a copy of the scores of the Observations."""

189 return self._scores.copy()

190

191 @property

192 def contexts(self) -> Optional[pd.DataFrame]:

193 """Gets a copy of the contexts of the Observations."""

194 return self._contexts.copy() if self._contexts is not None else None

195

196 @property

197 def metadata(self) -> Optional[pd.DataFrame]:

198 """Gets a copy of the metadata of the Observations."""

199 return self._metadata.copy() if self._metadata is not None else None

200

201 def filter_by_index(self, index: pd.Index) -> "Observations":

202 """

203 Filters the observation by the given indices.

204

205 Parameters

206 ----------

207 index : pandas.Index

208 The indices to filter by.

209

210 Returns

211 -------

212 Observation

213 The filtered observation.

214 """

215 return Observations(

216 configs=self._configs.loc[index].copy(),

217 scores=self._scores.loc[index].copy(),

218 contexts=None if self._contexts is None else self._contexts.loc[index].copy(),

219 metadata=None if self._metadata is None else self._metadata.loc[index].copy(),

220 )

221

222 def append(self, observation: Observation) -> None:

223 """

224 Appends the given observation to this observation.

225

226 Parameters

227 ----------

228 observation : Observation

229 The observation to append.

230 """

231 config = observation.config.to_frame().T

232 score = observation.score.to_frame().T

233 context = None if observation.context is None else observation.context.to_frame().T

234 metadata = None if observation.metadata is None else observation.metadata.to_frame().T

235 if len(self._configs.index) == 0:

236 self._configs = config

237 self._scores = score

238 self._contexts = context

239 self._metadata = metadata

240 assert set(self.configs.index) == set(

241 self.scores.index

242 ), "config and score must have the same index"

243 return

244

245 self._configs = pd.concat([self._configs, config]).reset_index(drop=True)

246 self._scores = pd.concat([self._scores, score]).reset_index(drop=True)

247 assert set(self.configs.index) == set(

248 self.scores.index

249 ), "config and score must have the same index"

250

251 if self._contexts is not None:

252 assert context is not None, (

253 "context of appending observation must not be null "

254 "if context of prior observation is not null"

255 )

256 self._contexts = pd.concat([self._contexts, context]).reset_index(drop=True)

257 assert self._configs.index.equals(

258 self._contexts.index

259 ), "config and context must have the same index"

260 else:

261 assert context is None, (

262 "context of appending observation must be null "

263 "if context of prior observation is null"

264 )

265 if self._metadata is not None:

266 assert metadata is not None, (

267 "context of appending observation must not be null "

268 "if metadata of prior observation is not null"

269 )

270 self._metadata = pd.concat([self._metadata, metadata]).reset_index(drop=True)

271 assert self._configs.index.equals(

272 self._metadata.index

273 ), "config and metadata must have the same index"

274 else:

275 assert metadata is None, (

276 "context of appending observation must be null "

277 "if metadata of prior observation is null"

278 )

279

280 def __len__(self) -> int:

281 return len(self._configs.index)

282

283 def __iter__(self) -> Iterator["Observation"]:

284 for idx in self._configs.index:

285 yield Observation(

286 config=self._configs.loc[idx],

287 score=self._scores.loc[idx],

288 context=None if self._contexts is None else self._contexts.loc[idx],

289 metadata=None if self._metadata is None else self._metadata.loc[idx],

290 )

291

292 def __repr__(self) -> str:

293 return (

294 f"Observation(configs={self._configs}, score={self._scores}, "

295 "contexts={self._contexts}, metadata={self._metadata})"

296 )

297

298 def __eq__(self, other: Any) -> bool:

299 if not isinstance(other, Observations):

300 return False

301

302 if not self._configs.equals(other._configs):

303 return False

304 if not self._scores.equals(other._scores):

305 return False

306 if not compare_optional_dataframe(self._contexts, other._contexts):

307 return False

308 if not compare_optional_dataframe(self._metadata, other._metadata):

309 return False

310

311 return True

312

313 # required as per: https://stackoverflow.com/questions/30643236/does-ne-use-an-overridden-eq

314 def __ne__(self, other: Any) -> bool:

315 return not self.__eq__(other)

316

317

318class Suggestion:

319 """

320 A single suggestion for a configuration.

321

322 A Suggestion is an Observation that has not yet been scored. Evaluating the

323 Suggestion and calling `complete(scores)` can convert it to an Observation.

324 """

325

326 def __init__(

327 self,

328 *,

329 config: pd.Series,

330 context: Optional[pd.Series] = None,

331 metadata: Optional[pd.Series] = None,

332 ):

333 """

334 Creates a new Suggestion.

335

336 Parameters

337 ----------

338 config : pandas.Series

339 The configuration suggested.

340 context : Optional[pandas.Series]

341 The context for this suggestion, by default None

342 metadata : Optional[pandas.Series]

343 Any metadata provided by the underlying optimizer, by default None

344 """

345 self._config = config

346 self._context = context

347 self._metadata = metadata

348

349 @property

350 def config(self) -> pd.Series:

351 """Gets (a copy of) the config of the Suggestion."""

352 return self._config.copy()

353

354 @property

355 def context(self) -> Optional[pd.Series]:

356 """Gets (a copy of) the context of the Suggestion."""

357 return self._context.copy() if self._context is not None else None

358

359 @property

360 def metadata(self) -> Optional[pd.Series]:

361 """Gets (a copy of) the metadata of the Suggestion."""

362 return self._metadata.copy() if self._metadata is not None else None

363

364 def complete(self, score: pd.Series) -> Observation:

365 """

366 Completes the Suggestion by adding a score to turn it into an Observation.

367

368 Parameters

369 ----------

370 score : pandas.Series

371 The score metrics observed.

372

373 Returns

374 -------

375 Observation

376 The observation of the suggestion.

377 """

378 return Observation(

379 config=self.config,

380 score=score,

381 context=self.context,

382 metadata=self.metadata,

383 )

384

385 def to_configspace_config(self, space: ConfigurationSpace) -> Configuration:

386 """

387 Convert a Configuration Space to a Configuration.

388

389 Parameters

390 ----------

391 space : ConfigSpace.ConfigurationSpace

392 The ConfigurationSpace to be converted.

393

394 Returns

395 -------

396 ConfigSpace.Configuration

397 The output Configuration.

398 """

399 return Configuration(space, values=self._config.dropna().to_dict())

400

401 def __repr__(self) -> str:

402 return (

403 f"Suggestion(config={self._config}, context={self._context}, "

404 "metadata={self._metadata})"

405 )

406

407 def __eq__(self, other: Any) -> bool:

408 if not isinstance(other, Suggestion):

409 return False

410

411 if not self._config.equals(other._config):

412 return False

413 if not compare_optional_series(self._context, other._context):

414 return False

415 if not compare_optional_series(self._metadata, other._metadata):

416 return False

417

418 return True

419

420 def __ne__(self, other: Any) -> bool:

421 return not self.__eq__(other)

Coverage for mlos_core/mlos_core/data_classes.py: 93%

148 statements