Coverage for mlos_bench/mlos_bench/config/schemas/config_schemas.py: 93%
85 statements
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-20 00:44 +0000
« prev ^ index » next coverage.py v7.6.9, created at 2024-12-20 00:44 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6A simple class for describing where to find different `json config schemas
7<https://json-schema.org>`_ and validating configs against them.
9Used by the :py:class:`~mlos_bench.launcher.Launcher` and
10:py:class:`~mlos_bench.services.config_persistence.ConfigPersistenceService` to
11validate configs on load.
13Notes
14-----
15- See `mlos_bench/config/schemas/README.md
16 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/>`_
17 for additional documentation in the source tree.
19- See `mlos_bench/config/README.md
20 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/>`_
21 for additional config examples in the source tree.
22"""
24import json # schema files are pure json - no comments
25import logging
26from enum import Enum
27from os import environ, path, walk
28from typing import Dict, Iterator, Mapping
30import jsonschema
31from referencing import Registry, Resource
32from referencing.jsonschema import DRAFT202012
34from mlos_bench.util import path_join
36_LOG = logging.getLogger(__name__)
38# The path to find all config schemas.
39CONFIG_SCHEMA_DIR = path_join(path.dirname(__file__), abs_path=True)
40"""The local directory where all config schemas shipped as a part of the
41:py:mod:`mlos_bench` module are stored.
42"""
44# Allow skipping schema validation for tight dev cycle changes.
45# It is used in `ConfigSchema.validate()` method below.
46# NOTE: this may cause pytest to fail if it's expecting exceptions
47# to be raised for invalid configs.
48VALIDATION_ENV_FLAG = "MLOS_BENCH_SKIP_SCHEMA_VALIDATION"
49"""
50The special environment flag to set to skip schema validation when "true".
52Useful for local development when you're making a lot of changes to the config or adding
53new classes that aren't in the main repo yet.
54"""
56_SKIP_VALIDATION = environ.get(VALIDATION_ENV_FLAG, "false").lower() in {
57 "true",
58 "y",
59 "yes",
60 "on",
61 "1",
62}
65# Note: we separate out the SchemaStore from a class method on ConfigSchema
66# because of issues with mypy/pylint and non-Enum-member class members.
67class SchemaStore(Mapping):
68 """A simple class for storing schemas and subschemas for the validator to
69 reference.
70 """
72 # A class member mapping of schema id to schema object.
73 _SCHEMA_STORE: Dict[str, dict] = {}
74 _REGISTRY: Registry = Registry()
76 def __len__(self) -> int:
77 return self._SCHEMA_STORE.__len__()
79 def __iter__(self) -> Iterator:
80 return self._SCHEMA_STORE.__iter__()
82 def __getitem__(self, key: str) -> dict:
83 """Gets the schema object for the given key."""
84 if not self._SCHEMA_STORE:
85 self._load_schemas()
86 return self._SCHEMA_STORE[key]
88 @classmethod
89 def _load_schemas(cls) -> None:
90 """Loads all schemas and subschemas into the schema store for the validator to
91 reference.
92 """
93 if cls._SCHEMA_STORE:
94 return
95 for root, _, files in walk(CONFIG_SCHEMA_DIR):
96 for file_name in files:
97 if not file_name.endswith(".json"):
98 continue
99 file_path = path_join(root, file_name)
100 if path.getsize(file_path) == 0:
101 continue
102 with open(file_path, mode="r", encoding="utf-8") as schema_file:
103 schema = json.load(schema_file)
104 cls._SCHEMA_STORE[file_path] = schema
105 # Let the schema be referenced by its id as well.
106 assert "$id" in schema
107 assert schema["$id"] not in cls._SCHEMA_STORE
108 cls._SCHEMA_STORE[schema["$id"]] = schema
110 @classmethod
111 def _load_registry(cls) -> None:
112 """Also store them in a Registry object for referencing by recent versions of
113 jsonschema.
114 """
115 if not cls._SCHEMA_STORE:
116 cls._load_schemas()
117 cls._REGISTRY = Registry().with_resources(
118 [
119 (url, Resource.from_contents(schema, default_specification=DRAFT202012))
120 for url, schema in cls._SCHEMA_STORE.items()
121 ]
122 )
124 @property
125 def registry(self) -> Registry:
126 """Returns a Registry object with all the schemas loaded."""
127 if not self._REGISTRY:
128 self._load_registry()
129 return self._REGISTRY
132SCHEMA_STORE = SchemaStore()
133"""Static :py:class:`.SchemaStore` instance used for storing and retrieving schemas for
134config validation.
135"""
138class ConfigSchema(Enum):
139 """An enum to help describe schema types and help validate configs against them."""
141 CLI = path_join(CONFIG_SCHEMA_DIR, "cli/cli-schema.json")
142 """
143 Json config `schema
144 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/cli/cli-schema.json>`__
145 for :py:mod:`mlos_bench <mlos_bench.run>` CLI configuration.
147 See Also
148 --------
149 mlos_bench.config : documentation on the configuration system.
150 mlos_bench.launcher.Launcher : class is responsible for processing the CLI args.
151 """
153 GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json")
154 """
155 Json config `schema
156 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/cli/globals-schema.json>`__
157 for :py:mod:`global variables <mlos_bench.config>`.
158 """
160 ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json")
161 """
162 Json config `schema
163 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/environments/environment-schema.json>`__
164 for :py:mod:`~mlos_bench.environments`.
165 """
167 OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json")
168 """
169 Json config `schema
170 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/optimizers/optimizer-schema.json>`__
171 for :py:mod:`~mlos_bench.optimizers`.
172 """
174 SCHEDULER = path_join(CONFIG_SCHEMA_DIR, "schedulers/scheduler-schema.json")
175 """
176 Json config `schema
177 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/schedulers/scheduler-schema.json>`__
178 for :py:mod:`~mlos_bench.schedulers`.
179 """
181 SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json")
182 """
183 Json config `schema
184 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/services/service-schema.json>`__
185 for :py:mod:`~mlos_bench.services`.
186 """
188 STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json")
189 """
190 Json config `schema
191 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/storage/storage-schema.json>`__
192 for :py:mod:`~mlos_bench.storage` instances.
193 """
195 TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json")
196 """
197 Json config `schema
198 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/tunables/tunable-params-schema.json>`__
199 for :py:mod:`~mlos_bench.tunables` instances.
200 """
202 TUNABLE_VALUES = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-values-schema.json")
203 """
204 Json config `schema
205 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/tunables/tunable-values-schema.json>`__
206 for values of :py:mod:`~mlos_bench.tunables.tunable_groups.TunableGroups` instances.
208 These can be used to specify the values of the tunables for a given experiment
209 using the :py:class:`~mlos_bench.optimizers.one_shot_optimizer.OneShotOptimizer`
210 for instance.
211 """
213 UNIFIED = path_join(CONFIG_SCHEMA_DIR, "mlos-bench-config-schema.json")
214 """
215 Combined global json `schema
216 <https://github.com/microsoft/MLOS/tree/main/mlos_bench/mlos_bench/config/schemas/mlos-bench-config-schema.json>`__
217 use to validate any ``mlos_bench`` config file (e.g., ``*.mlos.jsonc`` files).
219 See Also
220 --------
221 <https://www.schemastore.org/json/>
222 """
224 @property
225 def schema(self) -> dict:
226 """Gets the schema object for this type."""
227 schema = SCHEMA_STORE[self.value]
228 assert schema
229 return schema
231 def validate(self, config: dict) -> None:
232 """
233 Validates the given config against this schema.
235 Parameters
236 ----------
237 config : dict
238 The config to validate.
240 Raises
241 ------
242 jsonschema.exceptions.ValidationError
243 On validation failure.
244 jsonschema.exceptions.SchemaError
245 On schema loading error.
246 """
247 if _SKIP_VALIDATION:
248 _LOG.warning("%s is set - skip schema validation", VALIDATION_ENV_FLAG)
249 else:
250 jsonschema.Draft202012Validator(
251 schema=self.schema,
252 registry=SCHEMA_STORE.registry,
253 ).validate(config)