# -------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
import copy
import json
import logging
import shutil
import tempfile
from pathlib import Path
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Union
import docker
from docker.errors import BuildError, ContainerError
import olive.systems.docker.utils as docker_utils
from olive.common.config_utils import ParamCategory, validate_config
from olive.evaluator.metric_result import MetricResult
from olive.hardware import Device
from olive.model import ModelConfig
from olive.systems.common import AcceleratorConfig, LocalDockerConfig, SystemType
from olive.systems.olive_system import OliveSystem
from olive.systems.system_config import DockerTargetUserConfig
if TYPE_CHECKING:
from olive.evaluator.metric import Metric
from olive.evaluator.olive_evaluator import OliveEvaluatorConfig
from olive.hardware.accelerator import AcceleratorSpec
from olive.passes import Pass
logger = logging.getLogger(__name__)
[docs]class DockerSystem(OliveSystem):
system_type = SystemType.Docker
BASE_DOCKERFILE = "Dockerfile"
def __init__(
self,
local_docker_config: Union[Dict[str, Any], LocalDockerConfig],
accelerators: List[AcceleratorConfig] = None,
is_dev: bool = False,
hf_token: bool = None,
requirements_file: Optional[Union[Path, str]] = None,
**kwargs, # used to hold the rest of the arguments not used by dockersystem.
):
super().__init__(accelerators=accelerators, hf_token=hf_token)
logger.info("Initializing Docker System...")
self.is_dev = is_dev
self.docker_client = docker.from_env()
if local_docker_config is None:
raise ValueError("local_docker_config cannot be None.")
local_docker_config = validate_config(local_docker_config, LocalDockerConfig)
if not local_docker_config.build_context_path and not local_docker_config.dockerfile and not requirements_file:
raise ValueError("build_context_path, dockerfile and requirements_file cannot be None at the same time.")
self.config = DockerTargetUserConfig(**locals(), **kwargs)
self.run_params = local_docker_config.run_params
try:
self.image = self.docker_client.images.get(local_docker_config.image_name)
logger.info("Image %s found", local_docker_config.image_name)
except docker.errors.ImageNotFound:
with tempfile.TemporaryDirectory() as tempdir:
build_context_path = tempdir
if local_docker_config.build_context_path and local_docker_config.dockerfile:
dockerfile = local_docker_config.dockerfile
dockerfile_path = Path(local_docker_config.build_context_path) / dockerfile
shutil.copytree(local_docker_config.build_context_path, build_context_path, dirs_exist_ok=True)
else:
dockerfile = self.BASE_DOCKERFILE
dockerfile_path = Path(__file__).resolve().parent / self.BASE_DOCKERFILE
shutil.copy2(dockerfile_path, build_context_path)
if requirements_file:
shutil.copyfile(requirements_file, Path(build_context_path) / "requirements.txt")
else:
requirements_dest = Path(build_context_path) / "requirements.txt"
if not requirements_dest.exists():
with (Path(build_context_path) / "requirements.txt").open("w"):
pass
logger.info(
"Building image from Dockerfile %s with buildargs %s ",
dockerfile_path,
local_docker_config.build_args,
)
try:
self.image, build_logs = self.docker_client.images.build(
path=build_context_path,
dockerfile=dockerfile,
tag=local_docker_config.image_name,
buildargs=local_docker_config.build_args,
)
logger.info("Image %s build successfully.", local_docker_config.image_name)
_print_docker_logs(build_logs, logging.DEBUG)
except BuildError as e:
logger.exception("Image build failed with error.")
_print_docker_logs(e.build_log, logging.ERROR)
raise
def run_pass(
self,
the_pass: "Pass",
model_config: "ModelConfig",
output_model_path: str,
point: Optional[Dict[str, Any]] = None,
) -> "ModelConfig":
"""Run the pass on the model at a specific point in the search space."""
with tempfile.TemporaryDirectory() as tempdir:
return self._run_pass_container(Path(tempdir), the_pass, model_config, output_model_path, point)
def _run_pass_container(
self,
workdir: Path,
the_pass: "Pass",
model_config: "ModelConfig",
output_model_path: str,
point: Optional[Dict[str, Any]] = None,
) -> "ModelConfig":
point = point or {}
config = the_pass.config_at_search_point(point)
pass_config = the_pass.to_json(check_object=True)
pass_config["config"].update(the_pass.serialize_config(config, check_object=True))
volumes_list = []
runner_output_path = "runner_output"
runner_output_name = "runner_res.json"
container_root_path = Path("/olive-ws/")
# mount pass_runner script
docker_runner_path, pass_runner_file_mount_str = docker_utils.create_runner_script_mount(container_root_path)
volumes_list.append(pass_runner_file_mount_str)
# mount dev stuff
if self.is_dev:
_, dev_mount_str = docker_utils.create_dev_mount(workdir, container_root_path)
volumes_list.append(dev_mount_str)
# mount model
docker_model_files, model_mount_str_list, mount_model_to_local = docker_utils.create_model_mount(
model_config=model_config, container_root_path=container_root_path
)
volumes_list.extend(model_mount_str_list)
# data_dir or data_config
docker_data_paths, data_mount_str_list = self._create_data_mounts_for_pass(container_root_path, the_pass)
volumes_list.extend(data_mount_str_list)
# mount config file
data = self._create_runner_config(model_config, pass_config, docker_model_files, docker_data_paths)
logger.debug("Runner config is %s", data)
docker_config_file, config_file_mount_str = docker_utils.create_config_file(
workdir=workdir,
config=data,
container_root_path=container_root_path,
)
volumes_list.append(config_file_mount_str)
# output mount
output_local_path, docker_output_path, output_mount_str = docker_utils.create_output_mount(
workdir=workdir,
docker_output_path=runner_output_path,
container_root_path=container_root_path,
)
volumes_list.append(output_mount_str)
logger.debug("The volumes list is %s", volumes_list)
runner_command = docker_utils.create_runner_command(
runner_script_path=docker_runner_path,
config_path=docker_config_file,
output_path=docker_output_path,
output_name=runner_output_name,
)
model_output_json_file = self._run_container(
runner_command, volumes_list, output_local_path, runner_output_name, the_pass.accelerator_spec
)
if model_output_json_file.is_file():
with model_output_json_file.open() as f:
model_output = json.load(f)
output_model = ModelConfig.parse_obj(model_output)
logger.debug("Copying model from %s to %s", output_local_path, output_model_path)
shutil.copytree(output_local_path, output_model_path, dirs_exist_ok=True)
logger.debug("mount_model_to_local: %s", mount_model_to_local)
for resource_name, resource_str in output_model.get_resource_strings().items():
if not resource_str:
continue
logger.debug("Resource %s path: %s", resource_name, resource_str)
original_resource_path = mount_model_to_local.get(resource_str)
if original_resource_path:
# If the output model path is something like /olive-ws/model.onnx
# we need replace with the original model path
output_model.config[resource_name] = original_resource_path
logger.info("Original resource path for %s is: %s", resource_str, original_resource_path)
continue
# output_local_path should be something like: /tmp/tmpd1sjw9xa/runner_output
# If there are any output models, they will be saved in that path
# and the output_model.config["model_path"] would like /olive-ws/runner_output/model.onnx
# the model path should starts with /olive-ws/runner_output
assert resource_str.startswith(docker_output_path)
candidate_resource_path = resource_str.replace(docker_output_path, output_model_path)
output_model.config[resource_name] = candidate_resource_path
logger.debug("Model path is: %s", output_model.config["model_path"])
return output_model
else:
logger.error("Model output file %s not found.", model_output_json_file)
return None
def evaluate_model(
self, model_config: "ModelConfig", evaluator_config: "OliveEvaluatorConfig", accelerator: "AcceleratorSpec"
) -> Dict[str, Any]:
container_root_path = Path("/olive-ws/")
with tempfile.TemporaryDirectory() as tempdir:
metric_json = self._run_eval_container(
tempdir, model_config, evaluator_config, accelerator, container_root_path
)
if metric_json.is_file():
with metric_json.open() as f:
metrics_res = json.load(f)
return MetricResult.parse_obj(metrics_res)
else:
logger.error("Metric result file %s not found.", metric_json)
return None
def _run_eval_container(
self,
workdir,
model_config: "ModelConfig",
evaluator_config: "OliveEvaluatorConfig",
accelerator: "AcceleratorSpec",
container_root_path: Path,
):
eval_output_path = "eval_output"
eval_output_name = "eval_res.json"
volumes_list = []
# mount eval script
eval_file_mount_path, eval_file_mount_str = docker_utils.create_eval_script_mount(container_root_path)
volumes_list.append(eval_file_mount_str)
# mount dev stuff
if self.is_dev:
_, dev_mount_str = docker_utils.create_dev_mount(workdir, container_root_path)
volumes_list.append(dev_mount_str)
# mount model
model_mounts, model_mount_str_list, _ = docker_utils.create_model_mount(
model_config=model_config, container_root_path=container_root_path
)
volumes_list += model_mount_str_list
metrics_copy = copy.deepcopy(evaluator_config.metrics)
# mount metrics related external files
volumes_list.extend(
# the metrics_copy is modified when creating the volumes list
docker_utils.create_metric_volumes_list(
metrics=metrics_copy,
container_root_path=container_root_path,
)
)
# mount config file
data = self._create_eval_config(model_config, metrics_copy, model_mounts)
config_mount_path, config_file_mount_str = docker_utils.create_config_file(
workdir=workdir,
config=data,
container_root_path=container_root_path,
)
volumes_list.append(config_file_mount_str)
output_local_path, output_mount_path, output_mount_str = docker_utils.create_output_mount(
workdir=workdir,
docker_output_path=eval_output_path,
container_root_path=container_root_path,
)
volumes_list.append(output_mount_str)
logger.debug("The volumes list is %s", volumes_list)
eval_command = docker_utils.create_evaluate_command(
eval_script_path=eval_file_mount_path,
config_path=config_mount_path,
output_path=output_mount_path,
output_name=eval_output_name,
accelerator=accelerator,
)
return self._run_container(eval_command, volumes_list, output_local_path, eval_output_name, accelerator)
@staticmethod
def _create_eval_config(model_config: "ModelConfig", metrics: List["Metric"], model_mounts: Dict[str, str]):
model_json = model_config.to_json(check_object=True)
for k, v in model_mounts.items():
model_json["config"][k] = v
return {"metrics": [k.to_json(check_object=True) for k in metrics], "model": model_json}
@staticmethod
def _create_runner_config(
model_config: "ModelConfig",
pass_config: Dict[str, Any],
model_mounts: Dict[str, str],
data_mounts: Dict[str, str],
):
model_json = model_config.to_json(check_object=True)
for k, v in model_mounts.items():
model_json["config"][k] = v
pass_config_copy = copy.deepcopy(pass_config)
for k, v in data_mounts.items():
pass_config_copy["config"][k] = v
return {"model": model_json, "pass": pass_config_copy}
def _run_container(
self,
command,
volumes_list: List[str],
output_local_path,
output_name,
accelerator: "AcceleratorSpec",
):
run_command = docker_utils.create_run_command(run_params=self.run_params)
environment = run_command.pop("environment", {})
envs_dict = {"PYTHONPYCACHEPREFIX": "/tmp"}
for k, v in envs_dict.items():
if isinstance(environment, list):
environment = {env.split("=")[0]: env.split("=")[1] for env in environment}
elif isinstance(environment, dict) and not environment.get(k):
environment[k] = v
if self.hf_token:
token = get_huggingface_token()
environment.update({"HF_TOKEN": token})
log_level = logging.getLevelName(logger.getEffectiveLevel())
environment.update({"OLIVE_LOG_LEVEL": log_level})
logger.debug("Running container with command: %s", command)
if accelerator.accelerator_type == Device.GPU:
run_command["device_requests"] = [docker.types.DeviceRequest(capabilities=[["gpu"]])]
container = self.docker_client.containers.run(
image=self.image,
command=command,
volumes=volumes_list,
detach=True,
environment=environment,
**run_command,
)
docker_logs = []
for line in container.logs(stream=True):
# containers.logs can accept stdout/stderr as arguments, but it doesn't work
# as we cannot ensure that all the logs will be printed in the correct channel(out/err)
# so, we collect all the logs and print them in the end if there is an error.
log = line.decode().strip()
logger.debug(log)
docker_logs.append(log)
exit_code = container.wait()["StatusCode"]
container.remove()
if exit_code != 0:
error_msg = "\n".join(docker_logs)
raise ContainerError(
container, exit_code, command, self.image, f"Docker container evaluation failed with: {error_msg}"
)
logger.debug("Docker container run completed successfully")
return Path(output_local_path) / output_name
def _create_data_mounts_for_pass(self, container_root_path: Path, the_pass: "Pass"):
mounts = {}
mount_strs = []
for param, _, category in the_pass.path_params:
param_val = the_pass.config.get(param)
if category == ParamCategory.DATA and param_val:
mount = str(container_root_path / param)
mounts[param] = mount
mount_strs.append(f"{param_val}:{mount}")
return mounts, mount_strs
def remove(self):
self.docker_client.images.remove(self.image.tags[0], force=True)
logger.info("Image %s removed successfully.", self.image.tags[0])
def _print_docker_logs(logs, level=logging.DEBUG):
msgs = []
for log in logs:
if "stream" in log:
msgs.append(str(log["stream"]).strip())
else:
msgs.append(str(log).strip())
message = "\n".join(msgs)
logger.log(level, message)
def get_huggingface_token():
"""Get huggingface token from environment variable or token file."""
import os
if os.getenv("HF_TOKEN"):
return os.getenv("HF_TOKEN")
token_path = Path.home() / ".huggingface" / "token"
if not token_path.exists():
logger.error(
"Huggingface token is required at this step. Could not find huggingface token at %s. "
"Please login to huggingface first using `huggingface-cli login`. "
"If you already logged in, Olive will get token from '~/.huggingface/token' file'. "
"Please make sure the token file exists.",
token_path,
)
return None
with Path(token_path).open() as f:
return f.read().strip()