Source code for olive.azureml.azureml_client
# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import json
import logging
from pathlib import Path
from typing import Any, Dict, Optional
from olive.common.config_utils import ConfigBase
from olive.common.pydantic_v1 import Field, validator
from olive.common.utils import get_credentials
logger = logging.getLogger(__name__)
[docs]
class AzureMLClientConfig(ConfigBase):
"""Configuration for AzureMLClient.
This class is used to create an MLClient instance for AzureML operations.
Some fields like `read_timeout`, `max_operation_retries`, `operation_retry_interval` are used to control the
behavior of azureml operations like resource creation or download.
"""
subscription_id: str = Field(
None, description="Azure subscription id. Required if aml_config_path is not provided."
)
resource_group: str = Field(None, description="Azure resource group. Required if aml_config_path is not provided.")
workspace_name: str = Field(None, description="Azure workspace name. Required if aml_config_path is not provided.")
aml_config_path: str = Field(
None, description="Path to AzureML config file. If provided, other fields are ignored."
)
# read timeout in seconds for HTTP requests, user can increase if they find the default value too small.
# The default value from azureml sdk is 3000 which is too large and cause the evaluations and pass runs to
# sometimes hang for a long time between retries of job stream and download steps.
read_timeout: int = Field(60, description="Read timeout in seconds for HTTP requests.")
max_operation_retries: int = Field(
3, description="Max number of retries for AzureML operations like resource creation or download."
)
operation_retry_interval: int = Field(
5,
description=(
"Initial interval in seconds between retries for AzureML operations like resource creation or download. The"
" interval doubles after each retry."
),
)
# as the DefaultAzureCredential is used by default, we need to provide the default auth config for it.
# but DefaultAzureCredential accept kwargs as parameters, it is hard to validate the config.
# so we just provide a dict here and let the user to provide the correct config following the doc.
default_auth_params: Optional[Dict[str, Any]] = Field(
None,
description=(
"Default auth config for AzureML client. Please refer to"
" https://learn.microsoft.com/en-us/python/api/azure-identity/"
"azure.identity.defaultazurecredential?view=azure-python#parameters"
" for more details."
),
)
keyvault_name: Optional[str] = Field(
None,
description="Name of the keyvault to use. If provided, the keyvault will be used to retrieve secrets.",
)
@validator("aml_config_path", always=True)
def validate_aml_config_path(cls, v, values):
if v is not None:
if not Path(v).exists():
raise ValueError(f"aml_config_path {v} does not exist")
if not Path(v).is_file():
raise ValueError(f"aml_config_path {v} is not a file")
return v
[docs]
def get_workspace_config(self) -> Dict[str, str]:
"""Get the workspace config as a dict."""
if self.aml_config_path:
# If aml_config_path is provided, load the config from the file.
with open(self.aml_config_path) as f:
return json.load(f)
else:
# If aml_config_path is not provided, return the config from the class.
return {
"subscription_id": self.subscription_id,
"resource_group": self.resource_group,
"workspace_name": self.workspace_name,
}
[docs]
def create_client(self):
"""Create an MLClient instance."""
from azure.ai.ml import MLClient
set_azure_logging_if_noset()
if self.aml_config_path is None:
if self.subscription_id is None:
raise ValueError("subscription_id must be provided if aml_config_path is not provided")
if self.resource_group is None:
raise ValueError("resource_group must be provided if aml_config_path is not provided")
if self.workspace_name is None:
raise ValueError("workspace_name must be provided if aml_config_path is not provided")
return MLClient(
credential=get_credentials(self.default_auth_params),
subscription_id=self.subscription_id,
resource_group_name=self.resource_group,
workspace_name=self.workspace_name,
read_timeout=self.read_timeout,
)
else:
return MLClient.from_config(
credential=get_credentials(self.default_auth_params),
path=self.aml_config_path,
read_timeout=self.read_timeout,
)
[docs]
def create_registry_client(self, registry_name: str):
"""Create an MLClient instance."""
from azure.ai.ml import MLClient
set_azure_logging_if_noset()
return MLClient(credential=get_credentials(self.default_auth_params), registry_name=registry_name)
def set_azure_logging_if_noset():
# set logger level to error to avoid too many logs from azure sdk
azure_ml_logger = logging.getLogger("azure.ai.ml")
# only set the level if it is not set, to avoid changing the level set by the user
if not azure_ml_logger.level:
azure_ml_logger.setLevel(logging.ERROR)
azure_identity_logger = logging.getLogger("azure.identity")
# only set the level if it is not set, to avoid changing the level set by the user
if not azure_identity_logger.level:
azure_identity_logger.setLevel(logging.ERROR)