autogen_ext.models.azure#

class AzureAIChatCompletionClient(**kwargs: Unpack)[source]#

Bases: ChatCompletionClient

Chat completion client for models hosted on Azure AI Foundry or GitHub Models. See here for more info.

Parameters:

endpoint (str) – The endpoint to use. Required.
credential (union, AzureKeyCredential, AsyncTokenCredential) – The credentials to use. Required
model_info (ModelInfo) – The model family and capabilities of the model. Required.
model (str) – The name of the model. Required if model is hosted on GitHub Models.
frequency_penalty – (optional,float)
presence_penalty – (optional,float)
temperature – (optional,float)
top_p – (optional,float)
max_tokens – (optional,int)
response_format – (optional, literal[“text”, “json_object”])
stop – (optional,List[str])
tools – (optional,List[ChatCompletionsToolDefinition])
tool_choice – (optional,Union[str, ChatCompletionsToolChoicePreset, ChatCompletionsNamedToolChoice]])
seed – (optional,int)
model_extras – (optional,Dict[str, Any])

To use this client, you must install the azure extra:

pip install "autogen-ext[azure]"

The following code snippet shows how to use the client with GitHub Models:

import asyncio
import os
from azure.core.credentials import AzureKeyCredential
from autogen_ext.models.azure import AzureAIChatCompletionClient
from autogen_core.models import UserMessage


async def main():
    client = AzureAIChatCompletionClient(
        model="Phi-4",
        endpoint="https://models.inference.ai.azure.com",
        # To authenticate with the model you will need to generate a personal access token (PAT) in your GitHub settings.
        # Create your PAT token by following instructions here: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens
        credential=AzureKeyCredential(os.environ["GITHUB_TOKEN"]),
        model_info={
            "json_output": False,
            "function_calling": False,
            "vision": False,
            "family": "unknown",
            "structured_output": False,
        },
    )

    result = await client.create([UserMessage(content="What is the capital of France?", source="user")])
    print(result)

    # Close the client.
    await client.close()


if __name__ == "__main__":
    asyncio.run(main())

To use streaming, you can use the create_stream method:

import asyncio
import os

from autogen_core.models import UserMessage
from autogen_ext.models.azure import AzureAIChatCompletionClient
from azure.core.credentials import AzureKeyCredential


async def main():
    client = AzureAIChatCompletionClient(
        model="Phi-4",
        endpoint="https://models.inference.ai.azure.com",
        # To authenticate with the model you will need to generate a personal access token (PAT) in your GitHub settings.
        # Create your PAT token by following instructions here: https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens
        credential=AzureKeyCredential(os.environ["GITHUB_TOKEN"]),
        model_info={
            "json_output": False,
            "function_calling": False,
            "vision": False,
            "family": "unknown",
            "structured_output": False,
        },
    )

    # Create a stream.
    stream = client.create_stream([UserMessage(content="Write a poem about the ocean", source="user")])
    async for chunk in stream:
        print(chunk, end="", flush=True)
    print()

    # Close the client.
    await client.close()


if __name__ == "__main__":
    asyncio.run(main())

actual_usage() → RequestUsage[source]#

add_usage(usage: RequestUsage) → None[source]#

property capabilities: ModelInfo#

async close() → None[source]#

count_tokens(messages: Sequence[Annotated[SystemMessage | UserMessage | AssistantMessage | FunctionExecutionResultMessage, FieldInfo(annotation=NoneType, required=True, discriminator='type')]], *, tools: Sequence[Tool | ToolSchema] = []) → int[source]#

async create(messages: Sequence[Annotated[SystemMessage | UserMessage | AssistantMessage | FunctionExecutionResultMessage, FieldInfo(annotation=NoneType, required=True, discriminator='type')]], *, tools: Sequence[Tool | ToolSchema] = [], json_output: bool | type[BaseModel] | None = None, extra_create_args: Mapping[str, Any] = {}, cancellation_token: CancellationToken | None = None) → CreateResult[source]#

Creates a single response from the model.

Parameters:

messages (Sequence[LLMMessage]) – The messages to send to the model.
tools (Sequence[Tool | ToolSchema], optional) – The tools to use with the model. Defaults to [].
json_output (Optional[bool | type[BaseModel]], optional) – Whether to use JSON mode, structured output, or neither. Defaults to None. If set to a Pydantic BaseModel type, it will be used as the output type for structured output. If set to a boolean, it will be used to determine whether to use JSON mode or not. If set to True, make sure to instruct the model to produce JSON output in the instruction or prompt.
extra_create_args (Mapping[str, Any], optional) – Extra arguments to pass to the underlying client. Defaults to {}.
cancellation_token (Optional[CancellationToken], optional) – A token for cancellation. Defaults to None.

Returns:

CreateResult – The result of the model call.

async create_stream(messages: Sequence[Annotated[SystemMessage | UserMessage | AssistantMessage | FunctionExecutionResultMessage, FieldInfo(annotation=NoneType, required=True, discriminator='type')]], *, tools: Sequence[Tool | ToolSchema] = [], json_output: bool | type[BaseModel] | None = None, extra_create_args: Mapping[str, Any] = {}, cancellation_token: CancellationToken | None = None) → AsyncGenerator[str | CreateResult, None][source]#

Creates a stream of string chunks from the model ending with a CreateResult.

Parameters:

messages (Sequence[LLMMessage]) – The messages to send to the model.
tools (Sequence[Tool | ToolSchema], optional) – The tools to use with the model. Defaults to [].
json_output (Optional[bool | type[BaseModel]], optional) –
Whether to use JSON mode, structured output, or neither. Defaults to None. If set to a Pydantic BaseModel type, it will be used as the output type for structured output. If set to a boolean, it will be used to determine whether to use JSON mode or not. If set to True, make sure to instruct the model to produce JSON output in the instruction or prompt.
extra_create_args (Mapping[str, Any], optional) – Extra arguments to pass to the underlying client. Defaults to {}.
cancellation_token (Optional[CancellationToken], optional) – A token for cancellation. Defaults to None.

Returns:

AsyncGenerator[Union[str, CreateResult], None] – A generator that yields string chunks and ends with a CreateResult.

property model_info: ModelInfo#

remaining_tokens(messages: Sequence[Annotated[SystemMessage | UserMessage | AssistantMessage | FunctionExecutionResultMessage, FieldInfo(annotation=NoneType, required=True, discriminator='type')]], *, tools: Sequence[Tool | ToolSchema] = []) → int[source]#

total_usage() → RequestUsage[source]#

class AzureAIChatCompletionClientConfig[source]#

Bases: dict

credential: AzureKeyCredential | AsyncTokenCredential#

endpoint: str#

frequency_penalty: float | None#

max_tokens: int | None#

model: str | None#

model_extras: Dict[str, Any] | None#

model_info: ModelInfo#

presence_penalty: float | None#

response_format: Literal['text', 'json_object'] | None#

seed: int | None#

stop: List[str] | None#

temperature: float | None#

tool_choice: str | ChatCompletionsToolChoicePreset | ChatCompletionsNamedToolChoice | None#

tools: List[ChatCompletionsToolDefinition] | None#

top_p: float | None#