Coverage for mlos_bench/mlos_bench/services/config_persistence.py: 95%
171 statements
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 00:52 +0000
« prev ^ index » next coverage.py v7.8.0, created at 2025-04-01 00:52 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Helper functions to load, instantiate, and serialize Python objects that encapsulate a
7benchmark :py:class:`.Environment`, :py:mod:`~mlos_bench.tunables`, :py:class:`.Service`
8functions, etc from JSON configuration files and strings.
10See Also
11--------
12mlos_bench.config : Overview of the configuration system.
13"""
15import logging
16import os
17from collections.abc import Callable, Iterable
18from importlib.resources import files
19from typing import TYPE_CHECKING, Any
21import json5 # To read configs with comments and other JSON5 syntax features
22from jsonschema import SchemaError, ValidationError
24from mlos_bench.config.schemas.config_schemas import ConfigSchema
25from mlos_bench.environments.base_environment import Environment
26from mlos_bench.optimizers.base_optimizer import Optimizer
27from mlos_bench.services.base_service import Service
28from mlos_bench.services.types.config_loader_type import SupportsConfigLoading
29from mlos_bench.tunables.tunable_groups import TunableGroups
30from mlos_bench.tunables.tunable_types import TunableValue
31from mlos_bench.util import (
32 instantiate_from_config,
33 merge_parameters,
34 path_join,
35 preprocess_dynamic_configs,
36)
38if TYPE_CHECKING:
39 from mlos_bench.schedulers.base_scheduler import Scheduler
40 from mlos_bench.schedulers.trial_runner import TrialRunner
41 from mlos_bench.storage.base_storage import Storage
44_LOG = logging.getLogger(__name__)
47class ConfigPersistenceService(Service, SupportsConfigLoading):
48 """Collection of methods to deserialize the Environment, Service, and TunableGroups
49 objects.
50 """
52 BUILTIN_CONFIG_PATH = str(files("mlos_bench.config").joinpath("")).replace("\\", "/")
53 """A calculated path to the built-in configuration files shipped with the mlos_bench
54 package.
55 """
57 def __init__(
58 self,
59 config: dict[str, Any] | None = None,
60 global_config: dict[str, Any] | None = None,
61 parent: Service | None = None,
62 methods: dict[str, Callable] | list[Callable] | None = None,
63 ):
64 """
65 Create a new instance of config persistence service.
67 Parameters
68 ----------
69 config : dict
70 Free-format dictionary that contains parameters for the service.
71 (E.g., root path for config files, etc.)
72 global_config : dict
73 Free-format dictionary of global parameters.
74 parent : Service
75 An optional parent service that can provide mixin functions.
76 methods : dict[str, Callable] | list[Callable] | None
77 New methods to register with the service.
78 """
79 super().__init__(
80 config,
81 global_config,
82 parent,
83 self.merge_methods(
84 methods,
85 [
86 self.get_config_paths,
87 self.resolve_path,
88 self.load_config,
89 self.prepare_class_load,
90 self.build_service,
91 self.build_environment,
92 self.load_services,
93 self.load_environment,
94 self.load_environment_list,
95 ],
96 ),
97 )
98 self._config_loader_service = self
100 # Normalize and deduplicate config paths, but maintain order.
101 self._config_path: list[str] = []
102 for path in self.config.get("config_path", []):
103 if path not in self._config_path:
104 self._config_path.append(path_join(path, abs_path=True))
105 # Prepend the cwd if not already on the list.
106 cwd = path_join(os.getcwd(), abs_path=True)
107 if cwd not in self._config_path:
108 self._config_path.insert(0, cwd)
109 # Append the built-in config path if not already on the list.
110 if self.BUILTIN_CONFIG_PATH not in self._config_path:
111 self._config_path.append(self.BUILTIN_CONFIG_PATH)
113 @property
114 def config_paths(self) -> list[str]:
115 """
116 Gets the list of config paths this service will search for config files.
118 Returns
119 -------
120 list[str]
121 """
122 return list(self._config_path) # make a copy to avoid modifications
124 def get_config_paths(self) -> list[str]:
125 """
126 Gets the list of config paths this service will search for config files.
128 Returns
129 -------
130 list[str]
131 """
132 return self.config_paths
134 def resolve_path(self, file_path: str, extra_paths: Iterable[str] | None = None) -> str:
135 """
136 Resolves and prepends the suitable :py:attr:`.config_paths` to ``file_path`` if
137 the latter is not absolute. If :py:attr:`.config_paths` is ``None`` or
138 ``file_path`` is absolute, return ``file_path`` as is.
140 Parameters
141 ----------
142 file_path : str
143 Path to the input config file.
144 extra_paths : Iterable[str]
145 Additional directories to prepend to the list of
146 :py:attr:`.config_paths` search paths.
148 Returns
149 -------
150 path : str
151 An actual path to the config or script.
152 """
153 path_list = list(extra_paths or []) + self._config_path
154 _LOG.debug("Resolve path: %s in: %s", file_path, path_list)
155 if os.path.isabs(file_path):
156 _LOG.debug("Path is absolute: %s", file_path)
157 return file_path
158 for path in path_list:
159 full_path = path_join(path, file_path, abs_path=True)
160 if os.path.exists(full_path):
161 _LOG.debug("Path resolved: %s", full_path)
162 return full_path
163 _LOG.debug("Path not resolved: %s", file_path)
164 return file_path
166 def load_config(
167 self,
168 json: str,
169 schema_type: ConfigSchema | None,
170 ) -> dict[str, Any]:
171 """
172 Load JSON config file or JSON string. Search for a file relative to
173 :py:attr:`.config_paths` if the input path is not absolute. This method is
174 exported to be used as a :py:class:`.SupportsConfigLoading` type
175 :py:class:`.Service`.
177 Parameters
178 ----------
179 json : str
180 Path to the input config file or a JSON string.
181 schema_type : ConfigSchema | None
182 The schema type to validate the config against.
184 Returns
185 -------
186 config : dict | list[dict]
187 Free-format dictionary that contains the configuration.
188 """
189 assert isinstance(json, str)
190 if any(c in json for c in ("{", "[")):
191 # If the path contains braces, it is likely already a json string,
192 # so just parse it.
193 _LOG.info("Load config from json string: %s", json)
194 try:
195 config: Any = json5.loads(json)
196 except ValueError as ex:
197 _LOG.error("Failed to parse config from JSON string: %s", json)
198 raise ValueError(f"Failed to parse config from JSON string: {json}") from ex
199 else:
200 json = self.resolve_path(json)
201 _LOG.info("Load config file: %s", json)
202 with open(json, encoding="utf-8") as fh_json:
203 config = json5.load(fh_json)
204 if schema_type is not None:
205 try:
206 schema_type.validate(config)
207 except (ValidationError, SchemaError) as ex:
208 _LOG.error(
209 "Failed to validate config %s against schema type %s at %s",
210 json,
211 schema_type.name,
212 schema_type.value,
213 )
214 raise ValueError(
215 f"Failed to validate config {json} against "
216 f"schema type {schema_type.name} at {schema_type.value}"
217 ) from ex
218 if isinstance(config, dict) and config.get("$schema"):
219 # Remove $schema attributes from the config after we've validated
220 # them to avoid passing them on to other objects
221 # (e.g. SqlAlchemy based storage initializers).
222 # NOTE: we only do this for internal schemas.
223 # Other configs that get loaded may need the schema field
224 # (e.g. Azure ARM templates).
225 del config["$schema"]
226 else:
227 _LOG.warning("Config %s is not validated against a schema.", json)
228 return config # type: ignore[no-any-return]
230 def prepare_class_load(
231 self,
232 config: dict[str, Any],
233 global_config: dict[str, Any] | None = None,
234 parent_args: dict[str, TunableValue] | None = None,
235 ) -> tuple[str, dict[str, Any]]:
236 """
237 Extract the class instantiation parameters from the configuration. Mix-in the
238 global parameters and resolve the local file system paths, where it is required.
240 Parameters
241 ----------
242 config : dict
243 Configuration of the optimizer.
244 global_config : dict
245 Global configuration parameters (optional).
246 parent_args : dict[str, TunableValue]
247 An optional reference of the parent CompositeEnv's const_args used to
248 expand dynamic config parameters from.
250 Returns
251 -------
252 (class_name, class_config) : (str, dict)
253 Name of the class to instantiate and its configuration.
254 """
255 class_name = config["class"]
256 class_config = config.setdefault("config", {})
258 # Replace any appearance of "$param_name" in the const_arg values with
259 # the value from the parent CompositeEnv.
260 # Note: we could consider expanding this feature to additional config
261 # sections in the future, but for now only use it in const_args.
262 if class_name.startswith("mlos_bench.environments."):
263 const_args = class_config.get("const_args", {})
264 preprocess_dynamic_configs(dest=const_args, source=parent_args)
266 merge_parameters(dest=class_config, source=global_config)
268 for key in set(class_config).intersection(config.get("resolve_config_property_paths", [])):
269 if isinstance(class_config[key], str):
270 class_config[key] = self.resolve_path(class_config[key])
271 elif isinstance(class_config[key], (list, tuple)):
272 class_config[key] = [self.resolve_path(path) for path in class_config[key]]
273 else:
274 raise ValueError(f"Parameter {key} must be a string or a list")
276 if _LOG.isEnabledFor(logging.DEBUG):
277 _LOG.debug(
278 "Instantiating: %s with config:\n%s",
279 class_name,
280 json5.dumps(class_config, indent=2),
281 )
283 return (class_name, class_config)
285 def build_optimizer(
286 self,
287 *,
288 tunables: TunableGroups,
289 service: Service,
290 config: dict[str, Any],
291 global_config: dict[str, Any] | None = None,
292 ) -> Optimizer:
293 """
294 Instantiation of :py:mod:`mlos_bench` :py:class:`.Optimizer` that depend on
295 :py:class:`.Service` and :py:class:`.TunableGroups`.
297 Parameters
298 ----------
299 tunables : TunableGroups
300 Tunable parameters of the environment. We need them to validate the
301 configurations of merged-in experiments and restored/pending trials.
302 service: Service
303 An optional service object (e.g., providing methods to load config files, etc.)
304 config : dict
305 Configuration of the class to instantiate, as loaded from JSON.
306 global_config : dict
307 Global configuration parameters (optional).
309 Returns
310 -------
311 inst : Optimizer
312 A new instance of the `Optimizer` class.
313 """
314 tunables_path = config.get("include_tunables")
315 if tunables_path is not None:
316 tunables = self.load_tunables(tunables_path, tunables)
317 (class_name, class_config) = self.prepare_class_load(config, global_config)
318 inst = instantiate_from_config(
319 Optimizer, # type: ignore[type-abstract]
320 class_name,
321 tunables=tunables,
322 config=class_config,
323 global_config=global_config,
324 service=service,
325 )
326 _LOG.info("Created: Optimizer %s", inst)
327 return inst
329 def build_storage(
330 self,
331 *,
332 service: Service,
333 config: dict[str, Any],
334 global_config: dict[str, Any] | None = None,
335 ) -> "Storage":
336 """
337 Instantiation of mlos_bench :py:class:`.Storage` objects.
339 Parameters
340 ----------
341 service: Service
342 An optional service object (e.g., providing methods to load config files, etc.)
343 config : dict
344 Configuration of the class to instantiate, as loaded from JSON.
345 global_config : dict
346 Global configuration parameters (optional).
348 Returns
349 -------
350 inst : Storage
351 A new instance of the Storage class.
352 """
353 (class_name, class_config) = self.prepare_class_load(config, global_config)
354 # pylint: disable=import-outside-toplevel
355 from mlos_bench.storage.base_storage import Storage
357 inst = instantiate_from_config(
358 Storage, # type: ignore[type-abstract]
359 class_name,
360 config=class_config,
361 global_config=global_config,
362 service=service,
363 )
364 _LOG.info("Created: Storage %s", inst)
365 return inst
367 def build_scheduler( # pylint: disable=too-many-arguments
368 self,
369 *,
370 config: dict[str, Any],
371 global_config: dict[str, Any],
372 trial_runners: list["TrialRunner"],
373 optimizer: Optimizer,
374 storage: "Storage",
375 root_env_config: str,
376 ) -> "Scheduler":
377 """
378 Instantiation of mlos_bench :py:class:`.Scheduler`.
380 Parameters
381 ----------
382 config : dict
383 Configuration of the class to instantiate, as loaded from JSON.
384 global_config : dict
385 Global configuration parameters.
386 trial_runners : List[TrialRunner]
387 The TrialRunners (Environments) to use.
388 optimizer : Optimizer
389 The optimizer to use.
390 storage : Storage
391 The storage to use.
392 root_env_config : str
393 Path to the root environment configuration.
395 Returns
396 -------
397 inst : Scheduler
398 A new instance of the Scheduler.
399 """
400 (class_name, class_config) = self.prepare_class_load(config, global_config)
401 # pylint: disable=import-outside-toplevel
402 from mlos_bench.schedulers.base_scheduler import Scheduler
404 inst = instantiate_from_config(
405 Scheduler, # type: ignore[type-abstract]
406 class_name,
407 config=class_config,
408 global_config=global_config,
409 trial_runners=trial_runners,
410 optimizer=optimizer,
411 storage=storage,
412 root_env_config=root_env_config,
413 )
414 _LOG.info("Created: Scheduler %s", inst)
415 return inst
417 def build_environment(
418 self,
419 config: dict[str, Any],
420 tunables: TunableGroups,
421 global_config: dict[str, Any] | None = None,
422 parent_args: dict[str, TunableValue] | None = None,
423 service: Service | None = None,
424 ) -> Environment:
425 # pylint: disable=too-many-arguments,too-many-positional-arguments
426 """
427 Factory method for a new :py:class:`.Environment` with a given config.
429 Parameters
430 ----------
431 config : dict
432 A dictionary with three mandatory fields:
433 "name": Human-readable string describing the environment;
434 "class": FQN of a Python class to instantiate;
435 "config": Free-format dictionary to pass to the constructor.
436 tunables : TunableGroups
437 A (possibly empty) collection of groups of tunable parameters for
438 all environments.
439 global_config : dict
440 Global parameters to add to the environment config.
441 parent_args : dict[str, TunableValue]
442 An optional reference of the parent CompositeEnv's const_args used to
443 expand dynamic config parameters from.
444 service: Service
445 An optional service object (e.g., providing methods to
446 deploy or reboot a VM, etc.).
448 Returns
449 -------
450 env : Environment
451 An instance of the ``Environment`` class initialized with ``config``.
452 """
453 env_name = config["name"]
454 (env_class, env_config) = self.prepare_class_load(config, global_config, parent_args)
456 env_services_path = config.get("include_services")
457 if env_services_path is not None:
458 service = self.load_services(env_services_path, global_config, service)
460 if service is None:
461 service = Service(parent=self)
463 env_tunables_path = config.get("include_tunables")
464 if env_tunables_path is not None:
465 tunables = self.load_tunables(env_tunables_path, tunables)
467 _LOG.debug("Creating env: %s :: %s", env_name, env_class)
468 env = Environment.new(
469 env_name=env_name,
470 class_name=env_class,
471 config=env_config,
472 global_config=global_config,
473 tunables=tunables,
474 service=service,
475 )
477 _LOG.info("Created env: %s :: %s", env_name, env)
478 return env
480 def _build_standalone_service(
481 self,
482 config: dict[str, Any],
483 global_config: dict[str, Any] | None = None,
484 parent: Service | None = None,
485 ) -> Service:
486 """
487 Factory method for a new service with a given config.
489 Parameters
490 ----------
491 config : dict
492 A dictionary with two mandatory fields:
493 "class": FQN of a Python class to instantiate;
494 "config": Free-format dictionary to pass to the constructor.
495 global_config : dict
496 Global parameters to add to the service config.
497 parent: Service
498 An optional reference of the parent service to mix in.
500 Returns
501 -------
502 svc : Service
503 An instance of the `Service` class initialized with `config`.
504 """
505 (svc_class, svc_config) = self.prepare_class_load(config, global_config)
506 service = Service.new(svc_class, svc_config, global_config, parent)
507 _LOG.info("Created service: %s", service)
508 return service
510 def _build_composite_service(
511 self,
512 config_list: Iterable[dict[str, Any]],
513 global_config: dict[str, Any] | None = None,
514 parent: Service | None = None,
515 ) -> Service:
516 """
517 Factory method for a new service with a given config.
519 Parameters
520 ----------
521 config_list : a list of dict
522 A list where each element is a dictionary with 2 mandatory fields:
523 "class": FQN of a Python class to instantiate;
524 "config": Free-format dictionary to pass to the constructor.
525 global_config : dict
526 Global parameters to add to the service config.
527 parent: Service
528 An optional reference of the parent service to mix in.
530 Returns
531 -------
532 svc : Service
533 An instance of the `Service` class that is a combination of all
534 services from the list plus the parent mix-in.
535 """
536 service = Service()
537 if parent:
538 service.register(parent.export())
540 for config in config_list:
541 service.register(
542 self._build_standalone_service(config, global_config, service).export()
543 )
545 if _LOG.isEnabledFor(logging.DEBUG):
546 _LOG.debug("Created mix-in service: %s", service)
548 return service
550 def build_service(
551 self,
552 config: dict[str, Any],
553 global_config: dict[str, Any] | None = None,
554 parent: Service | None = None,
555 ) -> Service:
556 """
557 Factory method for a new service with a given config.
559 Parameters
560 ----------
561 config : dict
562 A dictionary with 2 mandatory fields:
563 "class": FQN of a Python class to instantiate;
564 "config": Free-format dictionary to pass to the constructor.
565 global_config : dict
566 Global parameters to add to the service config.
567 parent: Service
568 An optional reference of the parent service to mix in.
570 Returns
571 -------
572 svc : Service
573 An instance of the `Service` class that is a combination of all
574 services from the list plus the parent mix-in.
575 """
576 if _LOG.isEnabledFor(logging.DEBUG):
577 _LOG.debug("Build service from config:\n%s", json5.dumps(config, indent=2))
579 assert isinstance(config, dict)
580 config_list: list[dict[str, Any]]
581 if "class" not in config:
582 # Top level config is a simple object with a list of services
583 config_list = config["services"]
584 else:
585 # Top level config is a single service
586 if parent is None:
587 return self._build_standalone_service(config, global_config)
588 config_list = [config]
590 return self._build_composite_service(config_list, global_config, parent)
592 def load_environment(
593 self,
594 json: str,
595 tunables: TunableGroups,
596 global_config: dict[str, Any] | None = None,
597 parent_args: dict[str, TunableValue] | None = None,
598 service: Service | None = None,
599 ) -> Environment:
600 # pylint: disable=too-many-arguments,too-many-positional-arguments
601 """
602 Load and build new :py:class:`.Environment` from the config file or JSON string.
604 Parameters
605 ----------
606 json : str
607 The environment JSON configuration file or JSON string.
608 tunables : TunableGroups
609 A (possibly empty) collection of tunables to add to the environment.
610 global_config : dict
611 Global parameters to add to the environment config.
612 parent_args : dict[str, TunableValue]
613 An optional reference of the parent CompositeEnv's const_args used to
614 expand dynamic config parameters from.
615 service : Service
616 An optional reference of the parent service to mix in.
618 Returns
619 -------
620 env : Environment
621 A new benchmarking environment.
623 See Also
624 --------
625 mlos_bench.environments : Examples of environment configurations.
626 """
627 config = self.load_config(json, ConfigSchema.ENVIRONMENT)
628 assert isinstance(config, dict)
629 return self.build_environment(config, tunables, global_config, parent_args, service)
631 def load_environment_list(
632 self,
633 json: str,
634 tunables: TunableGroups,
635 global_config: dict[str, Any] | None = None,
636 parent_args: dict[str, TunableValue] | None = None,
637 service: Service | None = None,
638 ) -> list[Environment]:
639 # pylint: disable=too-many-arguments,too-many-positional-arguments
640 """
641 Load and build a list of Environments from the config file or JSON string.
643 Parameters
644 ----------
645 json : str
646 The environment JSON configuration file or a JSON string.
647 Can contain either one environment or a list of environments.
648 tunables : TunableGroups
649 An (possibly empty) collection of tunables to add to the environment.
650 global_config : dict
651 Global parameters to add to the environment config.
652 service : Service
653 An optional reference of the parent service to mix in.
654 parent_args : dict[str, TunableValue]
655 An optional reference of the parent CompositeEnv's const_args used to
656 expand dynamic config parameters from.
658 Returns
659 -------
660 env : list[Environment]
661 A list of new benchmarking environments.
663 See Also
664 --------
665 mlos_bench.environments : Examples of environment configurations.
666 """
667 config = self.load_config(json, ConfigSchema.ENVIRONMENT)
668 return [self.build_environment(config, tunables, global_config, parent_args, service)]
670 def load_services(
671 self,
672 jsons: Iterable[str],
673 global_config: dict[str, Any] | None = None,
674 parent: Service | None = None,
675 ) -> Service:
676 """
677 Read the configuration files or JSON strings and bundle all Service methods from
678 those configs into a single Service object.
680 Notes
681 -----
682 Order of the services in the list matters. If multiple Services export the
683 same method, the last one in the list will be used.
685 Parameters
686 ----------
687 jsons : list of str
688 A list of service JSON configuration files or JSON strings.
689 global_config : dict
690 Global parameters to add to the service config.
691 parent : Service
692 An optional reference of the parent service to mix in.
694 Returns
695 -------
696 service : Service
697 A collection of service methods.
699 See Also
700 --------
701 mlos_bench.services : Examples of service configurations.
702 """
703 _LOG.info("Load services: %s parent: %s", jsons, parent.__class__.__name__)
704 service = Service({}, global_config, parent)
705 for json in jsons:
706 config = self.load_config(json, ConfigSchema.SERVICE)
707 service.register(self.build_service(config, global_config, service).export())
708 return service
710 def load_tunables(
711 self,
712 jsons: Iterable[str],
713 parent: TunableGroups | None = None,
714 ) -> TunableGroups:
715 """
716 Load a collection of tunable parameters from JSON files or strings into the
717 parent TunableGroup.
719 This helps allow standalone environment configs to reference
720 overlapping tunable groups configs but still allow combining them into
721 a single instance that each environment can reference.
723 Parameters
724 ----------
725 jsons : list of str
726 A list of JSON files or JSON strings to load.
727 parent : TunableGroups
728 A (possibly empty) collection of tunables to add to the new collection.
730 Returns
731 -------
732 tunables : TunableGroups
733 The larger collection of tunable parameters.
735 See Also
736 --------
737 mlos_bench.tunables : Examples of tunable parameter configurations.
738 """
739 _LOG.info("Load tunables: '%s'", jsons)
740 if parent is None:
741 parent = TunableGroups()
742 tunables = parent.copy()
743 for json in jsons:
744 config = self.load_config(json, ConfigSchema.TUNABLE_PARAMS)
745 assert isinstance(config, dict)
746 tunables.merge(TunableGroups(config))
747 return tunables