Coverage for mlos_bench/mlos_bench/services/config_persistence.py: 95%
171 statements
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-14 00:55 +0000
« prev ^ index » next coverage.py v7.9.2, created at 2025-07-14 00:55 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6Helper functions to load, instantiate, and serialize Python objects that encapsulate a
7benchmark :py:class:`.Environment`, :py:mod:`~mlos_bench.tunables`, :py:class:`.Service`
8functions, etc from JSON configuration files and strings.
10See Also
11--------
12mlos_bench.config : Overview of the configuration system.
13"""
15import logging
16import os
17from collections.abc import Callable, Iterable
18from importlib.resources import files
19from typing import TYPE_CHECKING, Any
21import json5 # To read configs with comments and other JSON5 syntax features
22from jsonschema import SchemaError, ValidationError
24from mlos_bench.config.schemas.config_schemas import ConfigSchema
25from mlos_bench.environments.base_environment import Environment
26from mlos_bench.optimizers.base_optimizer import Optimizer
27from mlos_bench.services.base_service import Service
28from mlos_bench.services.types.config_loader_type import SupportsConfigLoading
29from mlos_bench.tunables.tunable_groups import TunableGroups
30from mlos_bench.tunables.tunable_types import TunableValue
31from mlos_bench.util import (
32 instantiate_from_config,
33 merge_parameters,
34 path_join,
35 preprocess_dynamic_configs,
36 sanitize_config,
37)
39if TYPE_CHECKING:
40 from mlos_bench.schedulers.base_scheduler import Scheduler
41 from mlos_bench.schedulers.trial_runner import TrialRunner
42 from mlos_bench.storage.base_storage import Storage
45_LOG = logging.getLogger(__name__)
48class ConfigPersistenceService(Service, SupportsConfigLoading):
49 """Collection of methods to deserialize the Environment, Service, and TunableGroups
50 objects.
51 """
53 BUILTIN_CONFIG_PATH = str(files("mlos_bench.config").joinpath("")).replace("\\", "/")
54 """A calculated path to the built-in configuration files shipped with the mlos_bench
55 package.
56 """
58 def __init__(
59 self,
60 config: dict[str, Any] | None = None,
61 global_config: dict[str, Any] | None = None,
62 parent: Service | None = None,
63 methods: dict[str, Callable] | list[Callable] | None = None,
64 ):
65 """
66 Create a new instance of config persistence service.
68 Parameters
69 ----------
70 config : dict
71 Free-format dictionary that contains parameters for the service.
72 (E.g., root path for config files, etc.)
73 global_config : dict
74 Free-format dictionary of global parameters.
75 parent : Service
76 An optional parent service that can provide mixin functions.
77 methods : dict[str, Callable] | list[Callable] | None
78 New methods to register with the service.
79 """
80 super().__init__(
81 config,
82 global_config,
83 parent,
84 self.merge_methods(
85 methods,
86 [
87 self.get_config_paths,
88 self.resolve_path,
89 self.load_config,
90 self.prepare_class_load,
91 self.build_service,
92 self.build_environment,
93 self.load_services,
94 self.load_environment,
95 self.load_environment_list,
96 ],
97 ),
98 )
99 self._config_loader_service = self
101 # Normalize and deduplicate config paths, but maintain order.
102 self._config_path: list[str] = []
103 for path in self.config.get("config_path", []):
104 if path not in self._config_path:
105 self._config_path.append(path_join(path, abs_path=True))
106 # Prepend the cwd if not already on the list.
107 cwd = path_join(os.getcwd(), abs_path=True)
108 if cwd not in self._config_path:
109 self._config_path.insert(0, cwd)
110 # Append the built-in config path if not already on the list.
111 if self.BUILTIN_CONFIG_PATH not in self._config_path:
112 self._config_path.append(self.BUILTIN_CONFIG_PATH)
114 @property
115 def config_paths(self) -> list[str]:
116 """
117 Gets the list of config paths this service will search for config files.
119 Returns
120 -------
121 list[str]
122 """
123 return list(self._config_path) # make a copy to avoid modifications
125 def get_config_paths(self) -> list[str]:
126 """
127 Gets the list of config paths this service will search for config files.
129 Returns
130 -------
131 list[str]
132 """
133 return self.config_paths
135 def resolve_path(self, file_path: str, extra_paths: Iterable[str] | None = None) -> str:
136 """
137 Resolves and prepends the suitable :py:attr:`.config_paths` to ``file_path`` if
138 the latter is not absolute. If :py:attr:`.config_paths` is ``None`` or
139 ``file_path`` is absolute, return ``file_path`` as is.
141 Parameters
142 ----------
143 file_path : str
144 Path to the input config file.
145 extra_paths : Iterable[str]
146 Additional directories to prepend to the list of
147 :py:attr:`.config_paths` search paths.
149 Returns
150 -------
151 path : str
152 An actual path to the config or script.
153 """
154 path_list = list(extra_paths or []) + self._config_path
155 _LOG.debug("Resolve path: %s in: %s", file_path, path_list)
156 if os.path.isabs(file_path):
157 _LOG.debug("Path is absolute: %s", file_path)
158 return file_path
159 for path in path_list:
160 full_path = path_join(path, file_path, abs_path=True)
161 if os.path.exists(full_path):
162 _LOG.debug("Path resolved: %s", full_path)
163 return full_path
164 _LOG.debug("Path not resolved: %s", file_path)
165 return file_path
167 def load_config(
168 self,
169 json: str,
170 schema_type: ConfigSchema | None,
171 ) -> dict[str, Any]:
172 """
173 Load JSON config file or JSON string. Search for a file relative to
174 :py:attr:`.config_paths` if the input path is not absolute. This method is
175 exported to be used as a :py:class:`.SupportsConfigLoading` type
176 :py:class:`.Service`.
178 Parameters
179 ----------
180 json : str
181 Path to the input config file or a JSON string.
182 schema_type : ConfigSchema | None
183 The schema type to validate the config against.
185 Returns
186 -------
187 config : dict | list[dict]
188 Free-format dictionary that contains the configuration.
189 """
190 assert isinstance(json, str)
191 if any(c in json for c in ("{", "[")):
192 # If the path contains braces, it is likely already a json string,
193 # so just parse it.
194 _LOG.info("Load config from json string: %s", json)
195 try:
196 config: Any = json5.loads(json)
197 except ValueError as ex:
198 _LOG.error("Failed to parse config from JSON string: %s", json)
199 raise ValueError(f"Failed to parse config from JSON string: {json}") from ex
200 else:
201 json = self.resolve_path(json)
202 _LOG.info("Load config file: %s", json)
203 with open(json, encoding="utf-8") as fh_json:
204 config = json5.load(fh_json)
205 if schema_type is not None:
206 try:
207 schema_type.validate(config)
208 except (ValidationError, SchemaError) as ex:
209 _LOG.error(
210 "Failed to validate config %s against schema type %s at %s",
211 json,
212 schema_type.name,
213 schema_type.value,
214 )
215 raise ValueError(
216 f"Failed to validate config {json} against "
217 f"schema type {schema_type.name} at {schema_type.value}"
218 ) from ex
219 if isinstance(config, dict) and config.get("$schema"):
220 # Remove $schema attributes from the config after we've validated
221 # them to avoid passing them on to other objects
222 # (e.g. SqlAlchemy based storage initializers).
223 # NOTE: we only do this for internal schemas.
224 # Other configs that get loaded may need the schema field
225 # (e.g. Azure ARM templates).
226 del config["$schema"]
227 else:
228 _LOG.warning("Config %s is not validated against a schema.", json)
229 return config # type: ignore[no-any-return]
231 def prepare_class_load(
232 self,
233 config: dict[str, Any],
234 global_config: dict[str, Any] | None = None,
235 parent_args: dict[str, TunableValue] | None = None,
236 ) -> tuple[str, dict[str, Any]]:
237 """
238 Extract the class instantiation parameters from the configuration. Mix-in the
239 global parameters and resolve the local file system paths, where it is required.
241 Parameters
242 ----------
243 config : dict
244 Configuration of the optimizer.
245 global_config : dict
246 Global configuration parameters (optional).
247 parent_args : dict[str, TunableValue]
248 An optional reference of the parent CompositeEnv's const_args used to
249 expand dynamic config parameters from.
251 Returns
252 -------
253 (class_name, class_config) : (str, dict)
254 Name of the class to instantiate and its configuration.
255 """
256 class_name = config["class"]
257 class_config = config.setdefault("config", {})
259 # Replace any appearance of "$param_name" in the const_arg values with
260 # the value from the parent CompositeEnv.
261 # Note: we could consider expanding this feature to additional config
262 # sections in the future, but for now only use it in const_args.
263 if class_name.startswith("mlos_bench.environments."):
264 const_args = class_config.get("const_args", {})
265 preprocess_dynamic_configs(dest=const_args, source=parent_args)
267 merge_parameters(dest=class_config, source=global_config)
269 for key in set(class_config).intersection(config.get("resolve_config_property_paths", [])):
270 if isinstance(class_config[key], str):
271 class_config[key] = self.resolve_path(class_config[key])
272 elif isinstance(class_config[key], (list, tuple)):
273 class_config[key] = [self.resolve_path(path) for path in class_config[key]]
274 else:
275 raise ValueError(f"Parameter {key} must be a string or a list")
277 if _LOG.isEnabledFor(logging.DEBUG):
278 _LOG.debug(
279 "Instantiating: %s with config:\n%s",
280 class_name,
281 json5.dumps(sanitize_config(class_config), indent=2),
282 )
284 return (class_name, class_config)
286 def build_optimizer(
287 self,
288 *,
289 tunables: TunableGroups,
290 service: Service,
291 config: dict[str, Any],
292 global_config: dict[str, Any] | None = None,
293 ) -> Optimizer:
294 """
295 Instantiation of :py:mod:`mlos_bench` :py:class:`.Optimizer` that depend on
296 :py:class:`.Service` and :py:class:`.TunableGroups`.
298 Parameters
299 ----------
300 tunables : TunableGroups
301 Tunable parameters of the environment. We need them to validate the
302 configurations of merged-in experiments and restored/pending trials.
303 service: Service
304 An optional service object (e.g., providing methods to load config files, etc.)
305 config : dict
306 Configuration of the class to instantiate, as loaded from JSON.
307 global_config : dict
308 Global configuration parameters (optional).
310 Returns
311 -------
312 inst : Optimizer
313 A new instance of the `Optimizer` class.
314 """
315 tunables_path = config.get("include_tunables")
316 if tunables_path is not None:
317 tunables = self.load_tunables(tunables_path, tunables)
318 (class_name, class_config) = self.prepare_class_load(config, global_config)
319 inst = instantiate_from_config(
320 Optimizer, # type: ignore[type-abstract]
321 class_name,
322 tunables=tunables,
323 config=class_config,
324 global_config=global_config,
325 service=service,
326 )
327 _LOG.info("Created: Optimizer %s", inst)
328 return inst
330 def build_storage(
331 self,
332 *,
333 service: Service,
334 config: dict[str, Any],
335 global_config: dict[str, Any] | None = None,
336 ) -> "Storage":
337 """
338 Instantiation of mlos_bench :py:class:`.Storage` objects.
340 Parameters
341 ----------
342 service: Service
343 An optional service object (e.g., providing methods to load config files, etc.)
344 config : dict
345 Configuration of the class to instantiate, as loaded from JSON.
346 global_config : dict
347 Global configuration parameters (optional).
349 Returns
350 -------
351 inst : Storage
352 A new instance of the Storage class.
353 """
354 (class_name, class_config) = self.prepare_class_load(config, global_config)
355 # pylint: disable=import-outside-toplevel
356 from mlos_bench.storage.base_storage import Storage
358 inst = instantiate_from_config(
359 Storage, # type: ignore[type-abstract]
360 class_name,
361 config=class_config,
362 global_config=global_config,
363 service=service,
364 )
365 _LOG.info("Created: Storage %s", inst)
366 return inst
368 def build_scheduler( # pylint: disable=too-many-arguments
369 self,
370 *,
371 config: dict[str, Any],
372 global_config: dict[str, Any],
373 trial_runners: list["TrialRunner"],
374 optimizer: Optimizer,
375 storage: "Storage",
376 root_env_config: str,
377 ) -> "Scheduler":
378 """
379 Instantiation of mlos_bench :py:class:`.Scheduler`.
381 Parameters
382 ----------
383 config : dict
384 Configuration of the class to instantiate, as loaded from JSON.
385 global_config : dict
386 Global configuration parameters.
387 trial_runners : List[TrialRunner]
388 The TrialRunners (Environments) to use.
389 optimizer : Optimizer
390 The optimizer to use.
391 storage : Storage
392 The storage to use.
393 root_env_config : str
394 Path to the root environment configuration.
396 Returns
397 -------
398 inst : Scheduler
399 A new instance of the Scheduler.
400 """
401 (class_name, class_config) = self.prepare_class_load(config, global_config)
402 # pylint: disable=import-outside-toplevel
403 from mlos_bench.schedulers.base_scheduler import Scheduler
405 inst = instantiate_from_config(
406 Scheduler, # type: ignore[type-abstract]
407 class_name,
408 config=class_config,
409 global_config=global_config,
410 trial_runners=trial_runners,
411 optimizer=optimizer,
412 storage=storage,
413 root_env_config=root_env_config,
414 )
415 _LOG.info("Created: Scheduler %s", inst)
416 return inst
418 def build_environment(
419 self,
420 config: dict[str, Any],
421 tunables: TunableGroups,
422 global_config: dict[str, Any] | None = None,
423 parent_args: dict[str, TunableValue] | None = None,
424 service: Service | None = None,
425 ) -> Environment:
426 # pylint: disable=too-many-arguments,too-many-positional-arguments
427 """
428 Factory method for a new :py:class:`.Environment` with a given config.
430 Parameters
431 ----------
432 config : dict
433 A dictionary with three mandatory fields:
434 "name": Human-readable string describing the environment;
435 "class": FQN of a Python class to instantiate;
436 "config": Free-format dictionary to pass to the constructor.
437 tunables : TunableGroups
438 A (possibly empty) collection of groups of tunable parameters for
439 all environments.
440 global_config : dict
441 Global parameters to add to the environment config.
442 parent_args : dict[str, TunableValue]
443 An optional reference of the parent CompositeEnv's const_args used to
444 expand dynamic config parameters from.
445 service: Service
446 An optional service object (e.g., providing methods to
447 deploy or reboot a VM, etc.).
449 Returns
450 -------
451 env : Environment
452 An instance of the ``Environment`` class initialized with ``config``.
453 """
454 env_name = config["name"]
455 (env_class, env_config) = self.prepare_class_load(config, global_config, parent_args)
457 env_services_path = config.get("include_services")
458 if env_services_path is not None:
459 service = self.load_services(env_services_path, global_config, service)
461 if service is None:
462 service = Service(parent=self)
464 env_tunables_path = config.get("include_tunables")
465 if env_tunables_path is not None:
466 tunables = self.load_tunables(env_tunables_path, tunables)
468 _LOG.debug("Creating env: %s :: %s", env_name, env_class)
469 env = Environment.new(
470 env_name=env_name,
471 class_name=env_class,
472 config=env_config,
473 global_config=global_config,
474 tunables=tunables,
475 service=service,
476 )
478 _LOG.info("Created env: %s :: %s", env_name, env)
479 return env
481 def _build_standalone_service(
482 self,
483 config: dict[str, Any],
484 global_config: dict[str, Any] | None = None,
485 parent: Service | None = None,
486 ) -> Service:
487 """
488 Factory method for a new service with a given config.
490 Parameters
491 ----------
492 config : dict
493 A dictionary with two mandatory fields:
494 "class": FQN of a Python class to instantiate;
495 "config": Free-format dictionary to pass to the constructor.
496 global_config : dict
497 Global parameters to add to the service config.
498 parent: Service
499 An optional reference of the parent service to mix in.
501 Returns
502 -------
503 svc : Service
504 An instance of the `Service` class initialized with `config`.
505 """
506 (svc_class, svc_config) = self.prepare_class_load(config, global_config)
507 service = Service.new(svc_class, svc_config, global_config, parent)
508 _LOG.info("Created service: %s", service)
509 return service
511 def _build_composite_service(
512 self,
513 config_list: Iterable[dict[str, Any]],
514 global_config: dict[str, Any] | None = None,
515 parent: Service | None = None,
516 ) -> Service:
517 """
518 Factory method for a new service with a given config.
520 Parameters
521 ----------
522 config_list : a list of dict
523 A list where each element is a dictionary with 2 mandatory fields:
524 "class": FQN of a Python class to instantiate;
525 "config": Free-format dictionary to pass to the constructor.
526 global_config : dict
527 Global parameters to add to the service config.
528 parent: Service
529 An optional reference of the parent service to mix in.
531 Returns
532 -------
533 svc : Service
534 An instance of the `Service` class that is a combination of all
535 services from the list plus the parent mix-in.
536 """
537 service = Service()
538 if parent:
539 service.register(parent.export())
541 for config in config_list:
542 service.register(
543 self._build_standalone_service(config, global_config, service).export()
544 )
546 if _LOG.isEnabledFor(logging.DEBUG):
547 _LOG.debug("Created mix-in service: %s", service)
549 return service
551 def build_service(
552 self,
553 config: dict[str, Any],
554 global_config: dict[str, Any] | None = None,
555 parent: Service | None = None,
556 ) -> Service:
557 """
558 Factory method for a new service with a given config.
560 Parameters
561 ----------
562 config : dict
563 A dictionary with 2 mandatory fields:
564 "class": FQN of a Python class to instantiate;
565 "config": Free-format dictionary to pass to the constructor.
566 global_config : dict
567 Global parameters to add to the service config.
568 parent: Service
569 An optional reference of the parent service to mix in.
571 Returns
572 -------
573 svc : Service
574 An instance of the `Service` class that is a combination of all
575 services from the list plus the parent mix-in.
576 """
577 if _LOG.isEnabledFor(logging.DEBUG):
578 _LOG.debug(
579 "Build service from config:\n%s",
580 json5.dumps(
581 sanitize_config(config),
582 indent=2,
583 ),
584 )
586 assert isinstance(config, dict)
587 config_list: list[dict[str, Any]]
588 if "class" not in config:
589 # Top level config is a simple object with a list of services
590 config_list = config["services"]
591 else:
592 # Top level config is a single service
593 if parent is None:
594 return self._build_standalone_service(config, global_config)
595 config_list = [config]
597 return self._build_composite_service(config_list, global_config, parent)
599 def load_environment(
600 self,
601 json: str,
602 tunables: TunableGroups,
603 global_config: dict[str, Any] | None = None,
604 parent_args: dict[str, TunableValue] | None = None,
605 service: Service | None = None,
606 ) -> Environment:
607 # pylint: disable=too-many-arguments,too-many-positional-arguments
608 """
609 Load and build new :py:class:`.Environment` from the config file or JSON string.
611 Parameters
612 ----------
613 json : str
614 The environment JSON configuration file or JSON string.
615 tunables : TunableGroups
616 A (possibly empty) collection of tunables to add to the environment.
617 global_config : dict
618 Global parameters to add to the environment config.
619 parent_args : dict[str, TunableValue]
620 An optional reference of the parent CompositeEnv's const_args used to
621 expand dynamic config parameters from.
622 service : Service
623 An optional reference of the parent service to mix in.
625 Returns
626 -------
627 env : Environment
628 A new benchmarking environment.
630 See Also
631 --------
632 mlos_bench.environments : Examples of environment configurations.
633 """
634 config = self.load_config(json, ConfigSchema.ENVIRONMENT)
635 assert isinstance(config, dict)
636 return self.build_environment(config, tunables, global_config, parent_args, service)
638 def load_environment_list(
639 self,
640 json: str,
641 tunables: TunableGroups,
642 global_config: dict[str, Any] | None = None,
643 parent_args: dict[str, TunableValue] | None = None,
644 service: Service | None = None,
645 ) -> list[Environment]:
646 # pylint: disable=too-many-arguments,too-many-positional-arguments
647 """
648 Load and build a list of Environments from the config file or JSON string.
650 Parameters
651 ----------
652 json : str
653 The environment JSON configuration file or a JSON string.
654 Can contain either one environment or a list of environments.
655 tunables : TunableGroups
656 An (possibly empty) collection of tunables to add to the environment.
657 global_config : dict
658 Global parameters to add to the environment config.
659 service : Service
660 An optional reference of the parent service to mix in.
661 parent_args : dict[str, TunableValue]
662 An optional reference of the parent CompositeEnv's const_args used to
663 expand dynamic config parameters from.
665 Returns
666 -------
667 env : list[Environment]
668 A list of new benchmarking environments.
670 See Also
671 --------
672 mlos_bench.environments : Examples of environment configurations.
673 """
674 config = self.load_config(json, ConfigSchema.ENVIRONMENT)
675 return [self.build_environment(config, tunables, global_config, parent_args, service)]
677 def load_services(
678 self,
679 jsons: Iterable[str],
680 global_config: dict[str, Any] | None = None,
681 parent: Service | None = None,
682 ) -> Service:
683 """
684 Read the configuration files or JSON strings and bundle all Service methods from
685 those configs into a single Service object.
687 Notes
688 -----
689 Order of the services in the list matters. If multiple Services export the
690 same method, the last one in the list will be used.
692 Parameters
693 ----------
694 jsons : list of str
695 A list of service JSON configuration files or JSON strings.
696 global_config : dict
697 Global parameters to add to the service config.
698 parent : Service
699 An optional reference of the parent service to mix in.
701 Returns
702 -------
703 service : Service
704 A collection of service methods.
706 See Also
707 --------
708 mlos_bench.services : Examples of service configurations.
709 """
710 _LOG.info("Load services: %s parent: %s", jsons, parent.__class__.__name__)
711 service = Service({}, global_config, parent)
712 for json in jsons:
713 config = self.load_config(json, ConfigSchema.SERVICE)
714 service.register(self.build_service(config, global_config, service).export())
715 return service
717 def load_tunables(
718 self,
719 jsons: Iterable[str],
720 parent: TunableGroups | None = None,
721 ) -> TunableGroups:
722 """
723 Load a collection of tunable parameters from JSON files or strings into the
724 parent TunableGroup.
726 This helps allow standalone environment configs to reference
727 overlapping tunable groups configs but still allow combining them into
728 a single instance that each environment can reference.
730 Parameters
731 ----------
732 jsons : list of str
733 A list of JSON files or JSON strings to load.
734 parent : TunableGroups
735 A (possibly empty) collection of tunables to add to the new collection.
737 Returns
738 -------
739 tunables : TunableGroups
740 The larger collection of tunable parameters.
742 See Also
743 --------
744 mlos_bench.tunables : Examples of tunable parameter configurations.
745 """
746 _LOG.info("Load tunables: '%s'", jsons)
747 if parent is None:
748 parent = TunableGroups()
749 tunables = parent.copy()
750 for json in jsons:
751 config = self.load_config(json, ConfigSchema.TUNABLE_PARAMS)
752 assert isinstance(config, dict)
753 tunables.merge(TunableGroups(config))
754 return tunables