Coverage for mlos_bench/mlos_bench/config/schemas/config_schemas.py: 92%
73 statements
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-06 00:35 +0000
« prev ^ index » next coverage.py v7.5.1, created at 2024-05-06 00:35 +0000
1#
2# Copyright (c) Microsoft Corporation.
3# Licensed under the MIT License.
4#
5"""
6A simple class for describing where to find different config schemas and validating configs against them.
7"""
9import logging
10from enum import Enum
11from os import path, walk, environ
12from typing import Dict, Iterator, Mapping
14import json # schema files are pure json - no comments
15import jsonschema
17from referencing import Registry, Resource
18from referencing.jsonschema import DRAFT202012
20from mlos_bench.util import path_join
22_LOG = logging.getLogger(__name__)
24# The path to find all config schemas.
25CONFIG_SCHEMA_DIR = path_join(path.dirname(__file__), abs_path=True)
27# Allow skipping schema validation for tight dev cycle changes.
28# It is used in `ConfigSchema.validate()` method below.
29# NOTE: this may cause pytest to fail if it's expecting exceptions
30# to be raised for invalid configs.
31_VALIDATION_ENV_FLAG = 'MLOS_BENCH_SKIP_SCHEMA_VALIDATION'
32_SKIP_VALIDATION = (environ.get(_VALIDATION_ENV_FLAG, 'false').lower()
33 in {'true', 'y', 'yes', 'on', '1'})
36# Note: we separate out the SchemaStore from a class method on ConfigSchema
37# because of issues with mypy/pylint and non-Enum-member class members.
38class SchemaStore(Mapping):
39 """
40 A simple class for storing schemas and subschemas for the validator to reference.
41 """
43 # A class member mapping of schema id to schema object.
44 _SCHEMA_STORE: Dict[str, dict] = {}
45 _REGISTRY: Registry = Registry()
47 def __len__(self) -> int:
48 return self._SCHEMA_STORE.__len__()
50 def __iter__(self) -> Iterator:
51 return self._SCHEMA_STORE.__iter__()
53 def __getitem__(self, key: str) -> dict:
54 """Gets the schema object for the given key."""
55 if not self._SCHEMA_STORE:
56 self._load_schemas()
57 return self._SCHEMA_STORE[key]
59 @classmethod
60 def _load_schemas(cls) -> None:
61 """Loads all schemas and subschemas into the schema store for the validator to reference."""
62 if cls._SCHEMA_STORE:
63 return
64 for root, _, files in walk(CONFIG_SCHEMA_DIR):
65 for file_name in files:
66 if not file_name.endswith(".json"):
67 continue
68 file_path = path_join(root, file_name)
69 if path.getsize(file_path) == 0:
70 continue
71 with open(file_path, mode="r", encoding="utf-8") as schema_file:
72 schema = json.load(schema_file)
73 cls._SCHEMA_STORE[file_path] = schema
74 # Let the schema be referenced by its id as well.
75 assert "$id" in schema
76 assert schema["$id"] not in cls._SCHEMA_STORE
77 cls._SCHEMA_STORE[schema["$id"]] = schema
79 @classmethod
80 def _load_registry(cls) -> None:
81 """Also store them in a Registry object for referencing by recent versions of jsonschema."""
82 if not cls._SCHEMA_STORE:
83 cls._load_schemas()
84 cls._REGISTRY = Registry().with_resources([
85 (url, Resource.from_contents(schema, default_specification=DRAFT202012))
86 for url, schema in cls._SCHEMA_STORE.items()
87 ])
89 @property
90 def registry(self) -> Registry:
91 """Returns a Registry object with all the schemas loaded."""
92 if not self._REGISTRY:
93 self._load_registry()
94 return self._REGISTRY
97SCHEMA_STORE = SchemaStore()
100class ConfigSchema(Enum):
101 """
102 An enum to help describe schema types and help validate configs against them.
103 """
105 CLI = path_join(CONFIG_SCHEMA_DIR, "cli/cli-schema.json")
106 GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json")
107 ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json")
108 OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json")
109 SCHEDULER = path_join(CONFIG_SCHEMA_DIR, "schedulers/scheduler-schema.json")
110 SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json")
111 STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json")
112 TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json")
113 TUNABLE_VALUES = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-values-schema.json")
115 UNIFIED = path_join(CONFIG_SCHEMA_DIR, "mlos-bench-config-schema.json")
117 @property
118 def schema(self) -> dict:
119 """Gets the schema object for this type."""
120 schema = SCHEMA_STORE[self.value]
121 assert schema
122 return schema
124 def validate(self, config: dict) -> None:
125 """
126 Validates the given config against this schema.
128 Parameters
129 ----------
130 config : dict
131 The config to validate.
133 Raises
134 ------
135 jsonschema.exceptions.ValidationError
136 jsonschema.exceptions.SchemaError
137 """
138 if _SKIP_VALIDATION:
139 _LOG.warning("%s is set - skip schema validation", _VALIDATION_ENV_FLAG)
140 else:
141 jsonschema.Draft202012Validator(
142 schema=self.schema,
143 registry=SCHEMA_STORE.registry,
144 ).validate(config)