Source code for autogen_core.models._model_client
from __future__ import annotations
from typing import Mapping, Optional, Sequence, runtime_checkable
from typing_extensions import (
Any,
AsyncGenerator,
Protocol,
Required,
TypedDict,
Union,
)
from .. import CancellationToken
from ..tools import Tool, ToolSchema
from ._types import CreateResult, LLMMessage, RequestUsage
[docs]
class ModelCapabilities(TypedDict, total=False):
vision: Required[bool]
function_calling: Required[bool]
json_output: Required[bool]
[docs]
@runtime_checkable
class ChatCompletionClient(Protocol):
# Caching has to be handled internally as they can depend on the create args that were stored in the constructor
self,
messages: Sequence[LLMMessage],
tools: Sequence[Tool | ToolSchema] = [],
# None means do not override the default
# A value means to override the client default - often specified in the constructor
json_output: Optional[bool] = None,
extra_create_args: Mapping[str, Any] = {},
cancellation_token: Optional[CancellationToken] = None,
) -> CreateResult: ...
self,
messages: Sequence[LLMMessage],
tools: Sequence[Tool | ToolSchema] = [],
# None means do not override the default
# A value means to override the client default - often specified in the constructor
json_output: Optional[bool] = None,
extra_create_args: Mapping[str, Any] = {},
cancellation_token: Optional[CancellationToken] = None,
) -> AsyncGenerator[Union[str, CreateResult], None]: ...
[docs]
def actual_usage(self) -> RequestUsage: ...
[docs]
def total_usage(self) -> RequestUsage: ...
[docs]
def count_tokens(self, messages: Sequence[LLMMessage], tools: Sequence[Tool | ToolSchema] = []) -> int: ...
[docs]
def remaining_tokens(self, messages: Sequence[LLMMessage], tools: Sequence[Tool | ToolSchema] = []) -> int: ...
@property
def capabilities(self) -> ModelCapabilities: ...