autogen_ext.agents#

class autogen_ext.agents.MultimodalWebSurfer(name: str, model_client: ChatCompletionClient, description: str = 'A helpful assistant with access to a web browser. Ask them to perform web searches, open pages, and interact with content (e.g., clicking links, scrolling the viewport, etc., filling in form fields, etc.) It can also summarize the entire page, or answer questions based on the content of the page. It can also be asked to sleep and wait for pages to load, in cases where the pages seem to be taking a while to load.', headless: bool = True, browser_channel: str | None = None, browser_data_dir: str | None = None, start_page: str | None = None, downloads_folder: str | None = None, debug_dir: str | None = '/home/runner/work/autogen/autogen/python/packages/autogen-core', to_save_screenshots: bool = False, animate_actions: bool = False, use_ocr: bool = True, to_resize_viewport: bool = True, playwright: Playwright | None = None, context: BrowserContext | None = None)[source]#

Bases: BaseChatAgent

(In preview) A multimodal agent that acts as a web surfer that can search the web and visit web pages.

DEFAULT_DESCRIPTION = 'A helpful assistant with access to a web browser. Ask them to perform web searches, open pages, and interact with content (e.g., clicking links, scrolling the viewport, etc., filling in form fields, etc.) It can also summarize the entire page, or answer questions based on the content of the page. It can also be asked to sleep and wait for pages to load, in cases where the pages seem to be taking a while to load.'#

DEFAULT_START_PAGE = 'https://www.bing.com/'#

async on_messages(messages: Sequence[TextMessage | MultiModalMessage | StopMessage | HandoffMessage], cancellation_token: CancellationToken) → Response[source]#: Handles incoming messages and returns a response.

async on_reset(cancellation_token: CancellationToken) → None[source]#: Resets the agent to its initialization state.

property produced_message_types: List[type[TextMessage | MultiModalMessage | StopMessage | HandoffMessage]]#: The types of messages that the agent produces.

Bases: BaseChatAgent

An agent implementation that uses the OpenAI Assistant API to generate responses.

This agent leverages the OpenAI Assistant API to create AI assistants with capabilities like: - Code interpretation and execution - File handling and search - Custom function calling - Multi-turn conversations

The agent maintains a thread of conversation and can use various tools including: - Code interpreter: For executing code and working with files - File search: For searching through uploaded documents - Custom functions: For extending capabilities with user-defined tools

Note

The agent deletes all messages in the thread when on_reset() is called.

Key Features: - Supports multiple file formats including code, documents, images - Can handle up to 128 tools per assistant - Maintains conversation context in threads - Supports file uploads for code interpreter and search - Vector store integration for efficient file search - Automatic file parsing and embedding

Example

from openai import AsyncClient
from autogen_core.base import CancellationToken
from autogen_ext.agents import OpenAIAssistantAgent
from autogen_agentchat.messages import TextMessage

# Create an OpenAI client
client = AsyncClient(api_key="your-api-key", base_url="your-base-url")

# Create an assistant with code interpreter
assistant = OpenAIAssistantAgent(
    name="Python Helper",
    description="Helps with Python programming",
    client=client,
    model="gpt-4",
    instructions="You are a helpful Python programming assistant.",
    tools=["code_interpreter"],
)

# Upload files for the assistant to use
await assistant.on_upload_for_code_interpreter("data.csv", cancellation_token)

# Get response from the assistant
response = await assistant.on_messages(
    [TextMessage(source="user", content="Analyze the data in data.csv")], cancellation_token
)

# Clean up resources
await assistant.delete_uploaded_files(cancellation_token)
await assistant.delete_assistant(cancellation_token)

Parameters:

name (str) – Name of the assistant
description (str) – Description of the assistant’s purpose
client (AsyncClient) – OpenAI API client instance
model (str) – Model to use (e.g. “gpt-4”)
instructions (str) – System instructions for the assistant
tools (Optional[Iterable[Union[Literal["code_interpreter", "file_search"], Tool | Callable[..., Any] | Callable[..., Awaitable[Any]]]]]) – Tools the assistant can use
assistant_id (Optional[str]) – ID of existing assistant to use
metadata (Optional[object]) – Additional metadata for the assistant
response_format (Optional[AssistantResponseFormatOptionParam]) – Response format settings
temperature (Optional[float]) – Temperature for response generation
tool_resources (Optional[ToolResources]) – Additional tool configuration
top_p (Optional[float]) – Top p sampling parameter

async delete_assistant(cancellation_token: CancellationToken) → None[source]#: Delete the assistant if it was created by this instance.

async delete_uploaded_files(cancellation_token: CancellationToken) → None[source]#: Delete all files that were uploaded by this agent instance.

async delete_vector_store(cancellation_token: CancellationToken) → None[source]#: Delete the vector store if it was created by this instance.

async handle_text_message(content: str, cancellation_token: CancellationToken) → None[source]#: Handle regular text messages by adding them to the thread.

property messages: AsyncMessages#

async on_messages(messages: Sequence[TextMessage | MultiModalMessage | StopMessage | HandoffMessage], cancellation_token: CancellationToken) → Response[source]#: Handle incoming messages and return a response.

async on_messages_stream(messages: Sequence[TextMessage | MultiModalMessage | StopMessage | HandoffMessage], cancellation_token: CancellationToken) → AsyncGenerator[TextMessage | MultiModalMessage | StopMessage | HandoffMessage | ToolCallMessage | ToolCallResultMessage | Response, None][source]#: Handle incoming messages and return a response.

async on_reset(cancellation_token: CancellationToken) → None[source]#: Handle reset command by deleting all messages in the thread.

async on_upload_for_code_interpreter(file_paths: str | Iterable[str], cancellation_token: CancellationToken) → None[source]#: Handle file uploads for the code interpreter.

async on_upload_for_file_search(file_paths: str | Iterable[str], cancellation_token: CancellationToken) → None[source]#: Handle file uploads for file search.

property produced_message_types: List[type[TextMessage | MultiModalMessage | StopMessage | HandoffMessage]]#: The types of messages that the assistant agent produces.

property runs: AsyncRuns#

property threads: AsyncThreads#