Source code for olive.azureml.azureml_client

# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import json
import logging
from pathlib import Path
from typing import Any, Dict, Optional

from olive.common.config_utils import ConfigBase
from olive.common.pydantic_v1 import Field, validator
from olive.common.utils import get_credentials

logger = logging.getLogger(__name__)


[docs] class AzureMLClientConfig(ConfigBase): """Configuration for AzureMLClient. This class is used to create an MLClient instance for AzureML operations. Some fields like `read_timeout`, `max_operation_retries`, `operation_retry_interval` are used to control the behavior of azureml operations like resource creation or download. """ subscription_id: str = Field( None, description="Azure subscription id. Required if aml_config_path is not provided." ) resource_group: str = Field(None, description="Azure resource group. Required if aml_config_path is not provided.") workspace_name: str = Field(None, description="Azure workspace name. Required if aml_config_path is not provided.") aml_config_path: str = Field( None, description="Path to AzureML config file. If provided, other fields are ignored." ) # read timeout in seconds for HTTP requests, user can increase if they find the default value too small. # The default value from azureml sdk is 3000 which is too large and cause the evaluations and pass runs to # sometimes hang for a long time between retries of job stream and download steps. read_timeout: int = Field(60, description="Read timeout in seconds for HTTP requests.") max_operation_retries: int = Field( 3, description="Max number of retries for AzureML operations like resource creation or download." ) operation_retry_interval: int = Field( 5, description=( "Initial interval in seconds between retries for AzureML operations like resource creation or download. The" " interval doubles after each retry." ), ) # as the DefaultAzureCredential is used by default, we need to provide the default auth config for it. # but DefaultAzureCredential accept kwargs as parameters, it is hard to validate the config. # so we just provide a dict here and let the user to provide the correct config following the doc. default_auth_params: Optional[Dict[str, Any]] = Field( None, description=( "Default auth config for AzureML client. Please refer to" " https://learn.microsoft.com/en-us/python/api/azure-identity/" "azure.identity.defaultazurecredential?view=azure-python#parameters" " for more details." ), ) keyvault_name: Optional[str] = Field( None, description="Name of the keyvault to use. If provided, the keyvault will be used to retrieve secrets.", ) @validator("aml_config_path", always=True) def validate_aml_config_path(cls, v, values): if v is not None: if not Path(v).exists(): raise ValueError(f"aml_config_path {v} does not exist") if not Path(v).is_file(): raise ValueError(f"aml_config_path {v} is not a file") return v
[docs] def get_workspace_config(self) -> Dict[str, str]: """Get the workspace config as a dict.""" if self.aml_config_path: # If aml_config_path is provided, load the config from the file. with open(self.aml_config_path) as f: return json.load(f) else: # If aml_config_path is not provided, return the config from the class. return { "subscription_id": self.subscription_id, "resource_group": self.resource_group, "workspace_name": self.workspace_name, }
[docs] def create_client(self): """Create an MLClient instance.""" from azure.ai.ml import MLClient set_azure_logging_if_noset() if self.aml_config_path is None: if self.subscription_id is None: raise ValueError("subscription_id must be provided if aml_config_path is not provided") if self.resource_group is None: raise ValueError("resource_group must be provided if aml_config_path is not provided") if self.workspace_name is None: raise ValueError("workspace_name must be provided if aml_config_path is not provided") return MLClient( credential=get_credentials(self.default_auth_params), subscription_id=self.subscription_id, resource_group_name=self.resource_group, workspace_name=self.workspace_name, read_timeout=self.read_timeout, ) else: return MLClient.from_config( credential=get_credentials(self.default_auth_params), path=self.aml_config_path, read_timeout=self.read_timeout, )
[docs] def create_registry_client(self, registry_name: str): """Create an MLClient instance.""" from azure.ai.ml import MLClient set_azure_logging_if_noset() return MLClient(credential=get_credentials(self.default_auth_params), registry_name=registry_name)
def set_azure_logging_if_noset(): # set logger level to error to avoid too many logs from azure sdk azure_ml_logger = logging.getLogger("azure.ai.ml") # only set the level if it is not set, to avoid changing the level set by the user if not azure_ml_logger.level: azure_ml_logger.setLevel(logging.ERROR) azure_identity_logger = logging.getLogger("azure.identity") # only set the level if it is not set, to avoid changing the level set by the user if not azure_identity_logger.level: azure_identity_logger.setLevel(logging.ERROR)