autogen_ext.models.semantic_kernel#

class SKChatCompletionAdapter(sk_client: ChatCompletionClientBase, model_info: ModelInfo | None = None, service_id: str | None = None, default_prompt_settings: PromptExecutionSettings | None = None)[source]#

Bases: ChatCompletionClient

SKChatCompletionAdapter is an adapter that allows using Semantic Kernel model clients as Autogen ChatCompletion clients. This makes it possible to seamlessly integrate Semantic Kernel connectors (e.g., Azure OpenAI, Google Gemini, Ollama, etc.) into Autogen agents that rely on a ChatCompletionClient interface.

By leveraging this adapter, you can:

Pass in a Kernel and any supported Semantic Kernel ChatCompletionClientBase connector.

Provide tools (via Autogen Tool or ToolSchema) for function calls during chat completion.

Stream responses or retrieve them in a single request.

Provide prompt settings to control the chat completion behavior either globally through the constructor or on a per-request basis through the extra_create_args dictionary.

Parameters:: sk_client (ChatCompletionClientBase) – The Semantic Kernel client to wrap (e.g., AzureChatCompletion, GoogleAIChatCompletion, OllamaChatCompletion).

Example usage:

import asyncio
from semantic_kernel import Kernel
from semantic_kernel.memory.null_memory import NullMemory
from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion
from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import (
    AzureChatPromptExecutionSettings,
)
from semantic_kernel.connectors.ai.google.google_ai import GoogleAIChatCompletion
from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings
from autogen_core.models import SystemMessage, UserMessage, LLMMessage
from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter
from autogen_core import CancellationToken
from autogen_core.tools import BaseTool
from pydantic import BaseModel


# 1) Basic tool definition (for demonstration)
class CalculatorArgs(BaseModel):
    a: float
    b: float


class CalculatorResult(BaseModel):
    result: float


class CalculatorTool(BaseTool[CalculatorArgs, CalculatorResult]):
    def __init__(self) -> None:
        super().__init__(
            args_type=CalculatorArgs,
            return_type=CalculatorResult,
            name="calculator",
            description="Add two numbers together",
        )

    async def run(self, args: CalculatorArgs, cancellation_token: CancellationToken) -> CalculatorResult:
        return CalculatorResult(result=args.a + args.b)


async def main():
    # 2) Create a Semantic Kernel instance (with null memory for simplicity)
    kernel = Kernel(memory=NullMemory())

    # ----------------------------------------------------------------
    # Example A: Azure OpenAI
    # ----------------------------------------------------------------
    deployment_name = "<AZURE_OPENAI_DEPLOYMENT_NAME>"
    endpoint = "<AZURE_OPENAI_ENDPOINT>"
    api_key = "<AZURE_OPENAI_API_KEY>"

    azure_client = AzureChatCompletion(deployment_name=deployment_name, endpoint=endpoint, api_key=api_key)
    azure_request_settings = AzureChatPromptExecutionSettings(temperature=0.8)
    azure_adapter = SKChatCompletionAdapter(sk_client=azure_client, default_prompt_settings=azure_request_settings)

    # ----------------------------------------------------------------
    # Example B: Google Gemini
    # ----------------------------------------------------------------
    google_api_key = "<GCP_API_KEY>"
    google_model = "gemini-1.5-flash"
    google_client = GoogleAIChatCompletion(gemini_model_id=google_model, api_key=google_api_key)
    google_adapter = SKChatCompletionAdapter(sk_client=google_client)

    # ----------------------------------------------------------------
    # Example C: Ollama (local Llama-based model)
    # ----------------------------------------------------------------
    ollama_client = OllamaChatCompletion(
        service_id="ollama",  # custom ID
        host="http://localhost:11434",
        ai_model_id="llama3.1",
    )
    request_settings = OllamaChatPromptExecutionSettings(
        # For model specific settings, specify them in the options dictionary.
        # For more information on the available options, refer to the Ollama API documentation:
        # https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
        options={
            "temperature": 0.8,
        },
    )
    ollama_adapter = SKChatCompletionAdapter(sk_client=ollama_client, default_prompt_settings=request_settings)

    # 3) Create a tool and register it with the kernel
    calc_tool = CalculatorTool()

    # 4) Prepare messages for a chat completion
    messages: list[LLMMessage] = [
        SystemMessage(content="You are a helpful assistant."),
        UserMessage(content="What is 2 + 2?", source="user"),
    ]

    # 5) Invoke chat completion with different adapters
    # Azure example
    azure_result = await azure_adapter.create(
        messages=messages,
        tools=[calc_tool],
        extra_create_args={"kernel": kernel, "prompt_execution_settings": azure_request_settings},
    )
    print("Azure result:", azure_result.content)

    # Google example
    google_result = await google_adapter.create(
        messages=messages,
        tools=[calc_tool],
        extra_create_args={"kernel": kernel},
    )
    print("Google result:", google_result.content)

    # Ollama example
    ollama_result = await ollama_adapter.create(
        messages=messages,
        tools=[calc_tool],
        extra_create_args={"kernel": kernel, "prompt_execution_settings": request_settings},
    )
    print("Ollama result:", ollama_result.content)


if __name__ == "__main__":
    asyncio.run(main())

actual_usage() → RequestUsage[source]#

property capabilities: ModelInfo#

count_tokens(messages: Sequence[Annotated[SystemMessage | UserMessage | AssistantMessage | FunctionExecutionResultMessage, FieldInfo(annotation=NoneType, required=True, discriminator='type')]], *, tools: Sequence[Tool | ToolSchema] = []) → int[source]#

async create(messages: Sequence[Annotated[SystemMessage | UserMessage | AssistantMessage | FunctionExecutionResultMessage, FieldInfo(annotation=NoneType, required=True, discriminator='type')]], *, tools: Sequence[Tool | ToolSchema] = [], json_output: bool | None = None, extra_create_args: Mapping[str, Any] = {}, cancellation_token: CancellationToken | None = None) → CreateResult[source]#

Create a chat completion using the Semantic Kernel client.

The extra_create_args dictionary can include two special keys:

“kernel” (required):
An instance of semantic_kernel.Kernel used to execute the request. If not provided, a ValueError is raised.
“prompt_execution_settings” (optional):
An instance of a PromptExecutionSettings subclass corresponding to the underlying Semantic Kernel client (e.g., AzureChatPromptExecutionSettings, GoogleAIChatPromptExecutionSettings). If not provided, the adapter’s default prompt settings will be used.

Parameters:

messages – The list of LLM messages to send.
tools – The tools that may be invoked during the chat.
json_output – Whether the model is expected to return JSON.
extra_create_args – Additional arguments to control the chat completion behavior.
cancellation_token – Token allowing cancellation of the request.

Returns:

CreateResult – The result of the chat completion.

async create_stream(messages: Sequence[Annotated[SystemMessage | UserMessage | AssistantMessage | FunctionExecutionResultMessage, FieldInfo(annotation=NoneType, required=True, discriminator='type')]], *, tools: Sequence[Tool | ToolSchema] = [], json_output: bool | None = None, extra_create_args: Mapping[str, Any] = {}, cancellation_token: CancellationToken | None = None) → AsyncGenerator[str | CreateResult, None][source]#

Create a streaming chat completion using the Semantic Kernel client.

The extra_create_args dictionary can include two special keys:

“kernel” (required):
An instance of semantic_kernel.Kernel used to execute the request. If not provided, a ValueError is raised.
“prompt_execution_settings” (optional):
An instance of a PromptExecutionSettings subclass corresponding to the underlying Semantic Kernel client (e.g., AzureChatPromptExecutionSettings, GoogleAIChatPromptExecutionSettings). If not provided, the adapter’s default prompt settings will be used.

Parameters:

messages – The list of LLM messages to send.
tools – The tools that may be invoked during the chat.
json_output – Whether the model is expected to return JSON.
extra_create_args – Additional arguments to control the chat completion behavior.
cancellation_token – Token allowing cancellation of the request.

Yields:

Union[str, CreateResult] – Either a string chunk of the response or a CreateResult containing function calls.

property model_info: ModelInfo#

remaining_tokens(messages: Sequence[Annotated[SystemMessage | UserMessage | AssistantMessage | FunctionExecutionResultMessage, FieldInfo(annotation=NoneType, required=True, discriminator='type')]], *, tools: Sequence[Tool | ToolSchema] = []) → int[source]#

total_usage() → RequestUsage[source]#