Coverage for mlos_bench/mlos_bench/config/schemas/config_schemas.py: 92%

73 statements  

« prev     ^ index     » next       coverage.py v7.5.1, created at 2024-05-06 00:35 +0000

1# 

2# Copyright (c) Microsoft Corporation. 

3# Licensed under the MIT License. 

4# 

5""" 

6A simple class for describing where to find different config schemas and validating configs against them. 

7""" 

8 

9import logging 

10from enum import Enum 

11from os import path, walk, environ 

12from typing import Dict, Iterator, Mapping 

13 

14import json # schema files are pure json - no comments 

15import jsonschema 

16 

17from referencing import Registry, Resource 

18from referencing.jsonschema import DRAFT202012 

19 

20from mlos_bench.util import path_join 

21 

22_LOG = logging.getLogger(__name__) 

23 

24# The path to find all config schemas. 

25CONFIG_SCHEMA_DIR = path_join(path.dirname(__file__), abs_path=True) 

26 

27# Allow skipping schema validation for tight dev cycle changes. 

28# It is used in `ConfigSchema.validate()` method below. 

29# NOTE: this may cause pytest to fail if it's expecting exceptions 

30# to be raised for invalid configs. 

31_VALIDATION_ENV_FLAG = 'MLOS_BENCH_SKIP_SCHEMA_VALIDATION' 

32_SKIP_VALIDATION = (environ.get(_VALIDATION_ENV_FLAG, 'false').lower() 

33 in {'true', 'y', 'yes', 'on', '1'}) 

34 

35 

36# Note: we separate out the SchemaStore from a class method on ConfigSchema 

37# because of issues with mypy/pylint and non-Enum-member class members. 

38class SchemaStore(Mapping): 

39 """ 

40 A simple class for storing schemas and subschemas for the validator to reference. 

41 """ 

42 

43 # A class member mapping of schema id to schema object. 

44 _SCHEMA_STORE: Dict[str, dict] = {} 

45 _REGISTRY: Registry = Registry() 

46 

47 def __len__(self) -> int: 

48 return self._SCHEMA_STORE.__len__() 

49 

50 def __iter__(self) -> Iterator: 

51 return self._SCHEMA_STORE.__iter__() 

52 

53 def __getitem__(self, key: str) -> dict: 

54 """Gets the schema object for the given key.""" 

55 if not self._SCHEMA_STORE: 

56 self._load_schemas() 

57 return self._SCHEMA_STORE[key] 

58 

59 @classmethod 

60 def _load_schemas(cls) -> None: 

61 """Loads all schemas and subschemas into the schema store for the validator to reference.""" 

62 if cls._SCHEMA_STORE: 

63 return 

64 for root, _, files in walk(CONFIG_SCHEMA_DIR): 

65 for file_name in files: 

66 if not file_name.endswith(".json"): 

67 continue 

68 file_path = path_join(root, file_name) 

69 if path.getsize(file_path) == 0: 

70 continue 

71 with open(file_path, mode="r", encoding="utf-8") as schema_file: 

72 schema = json.load(schema_file) 

73 cls._SCHEMA_STORE[file_path] = schema 

74 # Let the schema be referenced by its id as well. 

75 assert "$id" in schema 

76 assert schema["$id"] not in cls._SCHEMA_STORE 

77 cls._SCHEMA_STORE[schema["$id"]] = schema 

78 

79 @classmethod 

80 def _load_registry(cls) -> None: 

81 """Also store them in a Registry object for referencing by recent versions of jsonschema.""" 

82 if not cls._SCHEMA_STORE: 

83 cls._load_schemas() 

84 cls._REGISTRY = Registry().with_resources([ 

85 (url, Resource.from_contents(schema, default_specification=DRAFT202012)) 

86 for url, schema in cls._SCHEMA_STORE.items() 

87 ]) 

88 

89 @property 

90 def registry(self) -> Registry: 

91 """Returns a Registry object with all the schemas loaded.""" 

92 if not self._REGISTRY: 

93 self._load_registry() 

94 return self._REGISTRY 

95 

96 

97SCHEMA_STORE = SchemaStore() 

98 

99 

100class ConfigSchema(Enum): 

101 """ 

102 An enum to help describe schema types and help validate configs against them. 

103 """ 

104 

105 CLI = path_join(CONFIG_SCHEMA_DIR, "cli/cli-schema.json") 

106 GLOBALS = path_join(CONFIG_SCHEMA_DIR, "cli/globals-schema.json") 

107 ENVIRONMENT = path_join(CONFIG_SCHEMA_DIR, "environments/environment-schema.json") 

108 OPTIMIZER = path_join(CONFIG_SCHEMA_DIR, "optimizers/optimizer-schema.json") 

109 SCHEDULER = path_join(CONFIG_SCHEMA_DIR, "schedulers/scheduler-schema.json") 

110 SERVICE = path_join(CONFIG_SCHEMA_DIR, "services/service-schema.json") 

111 STORAGE = path_join(CONFIG_SCHEMA_DIR, "storage/storage-schema.json") 

112 TUNABLE_PARAMS = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-params-schema.json") 

113 TUNABLE_VALUES = path_join(CONFIG_SCHEMA_DIR, "tunables/tunable-values-schema.json") 

114 

115 UNIFIED = path_join(CONFIG_SCHEMA_DIR, "mlos-bench-config-schema.json") 

116 

117 @property 

118 def schema(self) -> dict: 

119 """Gets the schema object for this type.""" 

120 schema = SCHEMA_STORE[self.value] 

121 assert schema 

122 return schema 

123 

124 def validate(self, config: dict) -> None: 

125 """ 

126 Validates the given config against this schema. 

127 

128 Parameters 

129 ---------- 

130 config : dict 

131 The config to validate. 

132 

133 Raises 

134 ------ 

135 jsonschema.exceptions.ValidationError 

136 jsonschema.exceptions.SchemaError 

137 """ 

138 if _SKIP_VALIDATION: 

139 _LOG.warning("%s is set - skip schema validation", _VALIDATION_ENV_FLAG) 

140 else: 

141 jsonschema.Draft202012Validator( 

142 schema=self.schema, 

143 registry=SCHEMA_STORE.registry, 

144 ).validate(config)