autogen_ext.agents.video_surfer#

class VideoSurfer(name: str, model_client: ChatCompletionClient, *, tools: List[Tool | Callable[[...], Any] | Callable[[...], Awaitable[Any]]] | None = None, description: str | None = None, system_message: str | None = None)[source]#

Bases: AssistantAgent

VideoSurfer is a specialized agent designed to answer questions about a local video file.

This agent utilizes various tools to extract information from the video, such as its length, screenshots at specific timestamps, and audio transcriptions. It processes these elements to provide detailed answers to user queries.

Available tools:

Example usage:

The following example demonstrates how to create an video surfing agent with a model client and generate a response to a simple query about a local video called video.mp4.

import asyncio
from autogen_agentchat.ui import Console
from autogen_agentchat.conditions import TextMentionTermination
from autogen_agentchat.teams import RoundRobinGroupChat
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_ext.agents.video_surfer import VideoSurfer

async def main() -> None:
    """
    Main function to run the video agent.
    """
    # Define an agent
    video_agent = VideoSurfer(
        name="VideoSurfer",
        model_client=OpenAIChatCompletionClient(model="gpt-4o-2024-08-06")
        )

    # Define termination condition
    termination = TextMentionTermination("TERMINATE")

    # Define a team
    agent_team = RoundRobinGroupChat([video_agent], termination_condition=termination)

    # Run the team and stream messages to the console
    stream = agent_team.run_stream(task="How does Adam define complex tasks in video.mp4? What concrete example of complex does his use? Can you save this example to disk as well?")
    await Console(stream)

asyncio.run(main())

The following example demonstrates how to create and use a VideoSurfer and UserProxyAgent with MagenticOneGroupChat.

import asyncio

from autogen_agentchat.ui import Console
from autogen_agentchat.teams import MagenticOneGroupChat
from autogen_agentchat.agents import UserProxyAgent
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_ext.agents.video_surfer import VideoSurfer

async def main() -> None:
    """
    Main function to run the video agent.
    """

    model_client = OpenAIChatCompletionClient(model="gpt-4o-2024-08-06")

    # Define an agent
    video_agent = VideoSurfer(
        name="VideoSurfer",
        model_client=model_client
        )

    web_surfer_agent = UserProxyAgent(
        name="User"
    )

    # Define a team
    agent_team = MagenticOneGroupChat([web_surfer_agent, video_agent], model_client=model_client,)

    # Run the team and stream messages to the console
    stream = agent_team.run_stream(task="Find a latest video about magentic one on youtube and extract quotes from it that make sense.")
    await Console(stream)

asyncio.run(main())
DEFAULT_DESCRIPTION = 'An agent that can answer questions about a local video.'#
DEFAULT_SYSTEM_MESSAGE = '\n    You are a helpful agent that is an expert at answering questions from a video.\n    When asked to answer a question about a video, you should:\n    1. Check if that video is available locally.\n    2. Use the transcription to find which part of the video the question is referring to.\n    3. Optionally use screenshots from those timestamps\n    4. Provide a detailed answer to the question.\n    Reply with TERMINATE when the task has been completed.\n    '#
async vs_transribe_video_screenshot(video_path: str, timestamp: float) str[source]#

Transcribes the video screenshot at a specific timestamp.

Parameters:
  • video_path (str) – Path to the video file.

  • timestamp (float) – Timestamp to take the screenshot.

Returns:

str – Transcription of the video screenshot.