Source code for autogen_core.model_context._token_limited_chat_completion_context

from typing import List

from pydantic import BaseModel
from typing_extensions import Self

from .._component_config import Component, ComponentModel
from ..models import ChatCompletionClient, FunctionExecutionResultMessage, LLMMessage
from ..tools import ToolSchema
from ._chat_completion_context import ChatCompletionContext


class TokenLimitedChatCompletionContextConfig(BaseModel):
    model_client: ComponentModel
    token_limit: int | None = None
    tool_schema: List[ToolSchema] | None = None
    initial_messages: List[LLMMessage] | None = None


[docs] class TokenLimitedChatCompletionContext(ChatCompletionContext, Component[TokenLimitedChatCompletionContextConfig]): """(Experimental) A token based chat completion context maintains a view of the context up to a token limit. .. note:: Added in v0.4.10. This is an experimental component and may change in the future. Args: model_client (ChatCompletionClient): The model client to use for token counting. The model client must implement the :meth:`~autogen_core.models.ChatCompletionClient.count_tokens` and :meth:`~autogen_core.models.ChatCompletionClient.remaining_tokens` methods. token_limit (int | None): The maximum number of tokens to keep in the context using the :meth:`~autogen_core.models.ChatCompletionClient.count_tokens` method. If None, the context will be limited by the model client using the :meth:`~autogen_core.models.ChatCompletionClient.remaining_tokens` method. tools (List[ToolSchema] | None): A list of tool schema to use in the context. initial_messages (List[LLMMessage] | None): A list of initial messages to include in the context. """ component_config_schema = TokenLimitedChatCompletionContextConfig component_provider_override = "autogen_core.model_context.TokenLimitedChatCompletionContext" def __init__( self, model_client: ChatCompletionClient, *, token_limit: int | None = None, tool_schema: List[ToolSchema] | None = None, initial_messages: List[LLMMessage] | None = None, ) -> None: super().__init__(initial_messages) if token_limit is not None and token_limit <= 0: raise ValueError("token_limit must be greater than 0.") self._token_limit = token_limit self._model_client = model_client self._tool_schema = tool_schema or []
[docs] async def get_messages(self) -> List[LLMMessage]: """Get at most `token_limit` tokens in recent messages. If the token limit is not provided, then return as many messages as the remaining token allowed by the model client.""" messages = list(self._messages) if self._token_limit is None: remaining_tokens = self._model_client.remaining_tokens(messages, tools=self._tool_schema) while remaining_tokens < 0 and len(messages) > 0: middle_index = len(messages) // 2 messages.pop(middle_index) remaining_tokens = self._model_client.remaining_tokens(messages, tools=self._tool_schema) else: token_count = self._model_client.count_tokens(messages, tools=self._tool_schema) while token_count > self._token_limit and len(messages) > 0: middle_index = len(messages) // 2 messages.pop(middle_index) token_count = self._model_client.count_tokens(messages, tools=self._tool_schema) if messages and isinstance(messages[0], FunctionExecutionResultMessage): # Handle the first message is a function call result message. # Remove the first message from the list. messages = messages[1:] return messages
[docs] def _to_config(self) -> TokenLimitedChatCompletionContextConfig: return TokenLimitedChatCompletionContextConfig( model_client=self._model_client.dump_component(), token_limit=self._token_limit, tool_schema=self._tool_schema, initial_messages=self._initial_messages, )
[docs] @classmethod def _from_config(cls, config: TokenLimitedChatCompletionContextConfig) -> Self: return cls( model_client=ChatCompletionClient.load_component(config.model_client), token_limit=config.token_limit, tool_schema=config.tool_schema, initial_messages=config.initial_messages, )