autogen_ext.tools.graphrag#

pydantic model GlobalContextConfig[source]#

Bases: ContextConfig

Show JSON schema

{
   "title": "GlobalContextConfig",
   "type": "object",
   "properties": {
      "max_data_tokens": {
         "default": 12000,
         "title": "Max Data Tokens",
         "type": "integer"
      },
      "use_community_summary": {
         "default": false,
         "title": "Use Community Summary",
         "type": "boolean"
      },
      "shuffle_data": {
         "default": true,
         "title": "Shuffle Data",
         "type": "boolean"
      },
      "include_community_rank": {
         "default": true,
         "title": "Include Community Rank",
         "type": "boolean"
      },
      "min_community_rank": {
         "default": 0,
         "title": "Min Community Rank",
         "type": "integer"
      },
      "community_rank_name": {
         "default": "rank",
         "title": "Community Rank Name",
         "type": "string"
      },
      "include_community_weight": {
         "default": true,
         "title": "Include Community Weight",
         "type": "boolean"
      },
      "community_weight_name": {
         "default": "occurrence weight",
         "title": "Community Weight Name",
         "type": "string"
      },
      "normalize_community_weight": {
         "default": true,
         "title": "Normalize Community Weight",
         "type": "boolean"
      }
   }
}

Fields:

community_rank_name (str)
community_weight_name (str)
include_community_rank (bool)
include_community_weight (bool)
max_data_tokens (int)
min_community_rank (int)
normalize_community_weight (bool)
shuffle_data (bool)
use_community_summary (bool)

field community_rank_name: str = 'rank'#

field community_weight_name: str = 'occurrence weight'#

field include_community_rank: bool = True#

field include_community_weight: bool = True#

field max_data_tokens: int = 12000#

field min_community_rank: int = 0#

field normalize_community_weight: bool = True#

field shuffle_data: bool = True#

field use_community_summary: bool = False#

pydantic model GlobalDataConfig[source]#

Bases: DataConfig

Show JSON schema

{
   "title": "GlobalDataConfig",
   "type": "object",
   "properties": {
      "input_dir": {
         "title": "Input Dir",
         "type": "string"
      },
      "entity_table": {
         "default": "create_final_nodes",
         "title": "Entity Table",
         "type": "string"
      },
      "entity_embedding_table": {
         "default": "create_final_entities",
         "title": "Entity Embedding Table",
         "type": "string"
      },
      "community_level": {
         "default": 2,
         "title": "Community Level",
         "type": "integer"
      },
      "community_table": {
         "default": "create_final_communities",
         "title": "Community Table",
         "type": "string"
      },
      "community_report_table": {
         "default": "create_final_community_reports",
         "title": "Community Report Table",
         "type": "string"
      }
   },
   "required": [
      "input_dir"
   ]
}

Fields:

community_report_table (str)
community_table (str)

field community_report_table: str = 'create_final_community_reports'#

field community_table: str = 'create_final_communities'#

class GlobalSearchTool(token_encoder: Encoding, llm: BaseLLM, data_config: GlobalDataConfig, context_config: GlobalContextConfig = _default_context_config, mapreduce_config: MapReduceConfig = _default_mapreduce_config)[source]#

Bases: BaseTool[GlobalSearchToolArgs, GlobalSearchToolReturn]

Enables running GraphRAG global search queries as an AutoGen tool.

This tool allows you to perform semantic search over a corpus of documents using the GraphRAG framework. The search combines graph-based document relationships with semantic embeddings to find relevant information.

Note

This tool requires the graphrag extra for the autogen-ext package.

To install:

pip install -U "autogen-agentchat" "autogen-ext[graphrag]"

Before using this tool, you must complete the GraphRAG setup and indexing process:

Follow the GraphRAG documentation to initialize your project and settings
Configure and tune your prompts for the specific use case
Run the indexing process to generate the required data files
Ensure you have the settings.yaml file from the setup process

Please refer to the [GraphRAG documentation](https://microsoft.github.io/graphrag/) for detailed instructions on completing these prerequisite steps.

Example usage with AssistantAgent:

import asyncio
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_agentchat.ui import Console
from autogen_ext.tools.graphrag import GlobalSearchTool
from autogen_agentchat.agents import AssistantAgent


async def main():
    # Initialize the OpenAI client
    openai_client = OpenAIChatCompletionClient(
        model="gpt-4o-mini",
        api_key="<api-key>",
    )

    # Set up global search tool
    global_tool = GlobalSearchTool.from_settings(settings_path="./settings.yaml")

    # Create assistant agent with the global search tool
    assistant_agent = AssistantAgent(
        name="search_assistant",
        tools=[global_tool],
        model_client=openai_client,
        system_message=(
            "You are a tool selector AI assistant using the GraphRAG framework. "
            "Your primary task is to determine the appropriate search tool to call based on the user's query. "
            "For broader, abstract questions requiring a comprehensive understanding of the dataset, call the 'global_search' function."
        ),
    )

    # Run a sample query
    query = "What is the overall sentiment of the community reports?"
    await Console(assistant_agent.run_stream(task=query))


if __name__ == "__main__":
    asyncio.run(main())

classmethod from_settings(settings_path: str | Path) → GlobalSearchTool[source]#

Create a GlobalSearchTool instance from GraphRAG settings file.

Parameters:: settings_path – Path to the GraphRAG settings.yaml file
Returns:: An initialized GlobalSearchTool instance

async run(args: GlobalSearchToolArgs, cancellation_token: CancellationToken) → GlobalSearchToolReturn[source]#

pydantic model GlobalSearchToolArgs[source]#

Bases: BaseModel

Show JSON schema

{
   "title": "GlobalSearchToolArgs",
   "type": "object",
   "properties": {
      "query": {
         "description": "The user query to perform global search on.",
         "title": "Query",
         "type": "string"
      }
   },
   "required": [
      "query"
   ]
}

Fields:

query (str)

field query: str [Required]#: The user query to perform global search on.

pydantic model GlobalSearchToolReturn[source]#

Bases: BaseModel

Show JSON schema

{
   "title": "GlobalSearchToolReturn",
   "type": "object",
   "properties": {
      "answer": {
         "title": "Answer",
         "type": "string"
      }
   },
   "required": [
      "answer"
   ]
}

Fields:

answer (str)

field answer: str [Required]#

pydantic model LocalContextConfig[source]#

Bases: ContextConfig

Show JSON schema

{
   "title": "LocalContextConfig",
   "type": "object",
   "properties": {
      "max_data_tokens": {
         "default": 8000,
         "title": "Max Data Tokens",
         "type": "integer"
      },
      "text_unit_prop": {
         "default": 0.5,
         "title": "Text Unit Prop",
         "type": "number"
      },
      "community_prop": {
         "default": 0.25,
         "title": "Community Prop",
         "type": "number"
      },
      "include_entity_rank": {
         "default": true,
         "title": "Include Entity Rank",
         "type": "boolean"
      },
      "rank_description": {
         "default": "number of relationships",
         "title": "Rank Description",
         "type": "string"
      },
      "include_relationship_weight": {
         "default": true,
         "title": "Include Relationship Weight",
         "type": "boolean"
      },
      "relationship_ranking_attribute": {
         "default": "rank",
         "title": "Relationship Ranking Attribute",
         "type": "string"
      }
   }
}

Fields:

community_prop (float)
include_entity_rank (bool)
include_relationship_weight (bool)
rank_description (str)
relationship_ranking_attribute (str)
text_unit_prop (float)

field community_prop: float = 0.25#

field include_entity_rank: bool = True#

field include_relationship_weight: bool = True#

field rank_description: str = 'number of relationships'#

field relationship_ranking_attribute: str = 'rank'#

field text_unit_prop: float = 0.5#

pydantic model LocalDataConfig[source]#

Bases: DataConfig

Show JSON schema

{
   "title": "LocalDataConfig",
   "type": "object",
   "properties": {
      "input_dir": {
         "title": "Input Dir",
         "type": "string"
      },
      "entity_table": {
         "default": "create_final_nodes",
         "title": "Entity Table",
         "type": "string"
      },
      "entity_embedding_table": {
         "default": "create_final_entities",
         "title": "Entity Embedding Table",
         "type": "string"
      },
      "community_level": {
         "default": 2,
         "title": "Community Level",
         "type": "integer"
      },
      "relationship_table": {
         "default": "create_final_relationships",
         "title": "Relationship Table",
         "type": "string"
      },
      "text_unit_table": {
         "default": "create_final_text_units",
         "title": "Text Unit Table",
         "type": "string"
      }
   },
   "required": [
      "input_dir"
   ]
}

Fields:

relationship_table (str)
text_unit_table (str)

field relationship_table: str = 'create_final_relationships'#

field text_unit_table: str = 'create_final_text_units'#

class LocalSearchTool(token_encoder: Encoding, llm: BaseLLM, embedder: BaseTextEmbedding, data_config: LocalDataConfig, context_config: LocalContextConfig = _default_context_config, search_config: SearchConfig = _default_search_config)[source]#

Bases: BaseTool[LocalSearchToolArgs, LocalSearchToolReturn]

Enables running GraphRAG local search queries as an AutoGen tool.

This tool allows you to perform semantic search over a corpus of documents using the GraphRAG framework. The search combines local document context with semantic embeddings to find relevant information.

Note

This tool requires the graphrag extra for the autogen-ext package. To install:

pip install -U "autogen-agentchat" "autogen-ext[graphrag]"

Before using this tool, you must complete the GraphRAG setup and indexing process:

Follow the GraphRAG documentation to initialize your project and settings
Configure and tune your prompts for the specific use case
Run the indexing process to generate the required data files
Ensure you have the settings.yaml file from the setup process

Please refer to the [GraphRAG documentation](https://microsoft.github.io/graphrag/) for detailed instructions on completing these prerequisite steps.

Example usage with AssistantAgent:

import asyncio
from autogen_ext.models.openai import OpenAIChatCompletionClient
from autogen_agentchat.ui import Console
from autogen_ext.tools.graphrag import LocalSearchTool
from autogen_agentchat.agents import AssistantAgent


async def main():
    # Initialize the OpenAI client
    openai_client = OpenAIChatCompletionClient(
        model="gpt-4o-mini",
        api_key="<api-key>",
    )

    # Set up local search tool
    local_tool = LocalSearchTool.from_settings(settings_path="./settings.yaml")

    # Create assistant agent with the local search tool
    assistant_agent = AssistantAgent(
        name="search_assistant",
        tools=[local_tool],
        model_client=openai_client,
        system_message=(
            "You are a tool selector AI assistant using the GraphRAG framework. "
            "Your primary task is to determine the appropriate search tool to call based on the user's query. "
            "For specific, detailed information about particular entities or relationships, call the 'local_search' function."
        ),
    )

    # Run a sample query
    query = "What does the station-master say about Dr. Becher?"
    await Console(assistant_agent.run_stream(task=query))


if __name__ == "__main__":
    asyncio.run(main())

Parameters:

token_encoder (tiktoken.Encoding) – The tokenizer used for text encoding
llm (BaseLLM) – The language model to use for search
embedder (BaseTextEmbedding) – The text embedding model to use
data_config (DataConfig) – Configuration for data source locations and settings
context_config (LocalContextConfig, optional) – Configuration for context building. Defaults to default config.
search_config (SearchConfig, optional) – Configuration for search operations. Defaults to default config.

classmethod from_settings(settings_path: str | Path) → LocalSearchTool[source]#

Create a LocalSearchTool instance from GraphRAG settings file.

Parameters:: settings_path – Path to the GraphRAG settings.yaml file
Returns:: An initialized LocalSearchTool instance

async run(args: LocalSearchToolArgs, cancellation_token: CancellationToken) → LocalSearchToolReturn[source]#

pydantic model LocalSearchToolArgs[source]#

Bases: BaseModel

Show JSON schema

{
   "title": "LocalSearchToolArgs",
   "type": "object",
   "properties": {
      "query": {
         "description": "The user query to perform local search on.",
         "title": "Query",
         "type": "string"
      }
   },
   "required": [
      "query"
   ]
}

Fields:

query (str)

field query: str [Required]#: The user query to perform local search on.

pydantic model LocalSearchToolReturn[source]#

Bases: BaseModel

Show JSON schema

{
   "title": "LocalSearchToolReturn",
   "type": "object",
   "properties": {
      "answer": {
         "description": "The answer to the user query.",
         "title": "Answer",
         "type": "string"
      }
   },
   "required": [
      "answer"
   ]
}

Fields:

answer (str)

field answer: str [Required]#: The answer to the user query.

pydantic model MapReduceConfig[source]#

Bases: BaseModel

Show JSON schema

{
   "title": "MapReduceConfig",
   "type": "object",
   "properties": {
      "map_max_tokens": {
         "default": 1000,
         "title": "Map Max Tokens",
         "type": "integer"
      },
      "map_temperature": {
         "default": 0.0,
         "title": "Map Temperature",
         "type": "number"
      },
      "reduce_max_tokens": {
         "default": 2000,
         "title": "Reduce Max Tokens",
         "type": "integer"
      },
      "reduce_temperature": {
         "default": 0.0,
         "title": "Reduce Temperature",
         "type": "number"
      },
      "allow_general_knowledge": {
         "default": false,
         "title": "Allow General Knowledge",
         "type": "boolean"
      },
      "json_mode": {
         "default": false,
         "title": "Json Mode",
         "type": "boolean"
      },
      "response_type": {
         "default": "multiple paragraphs",
         "title": "Response Type",
         "type": "string"
      }
   }
}

Fields:

allow_general_knowledge (bool)
json_mode (bool)
map_max_tokens (int)
map_temperature (float)
reduce_max_tokens (int)
reduce_temperature (float)
response_type (str)

field allow_general_knowledge: bool = False#

field json_mode: bool = False#

field map_max_tokens: int = 1000#

field map_temperature: float = 0.0#

field reduce_max_tokens: int = 2000#

field reduce_temperature: float = 0.0#

field response_type: str = 'multiple paragraphs'#

pydantic model SearchConfig[source]#

Bases: BaseModel

Show JSON schema

{
   "title": "SearchConfig",
   "type": "object",
   "properties": {
      "max_tokens": {
         "default": 1500,
         "title": "Max Tokens",
         "type": "integer"
      },
      "temperature": {
         "default": 0.0,
         "title": "Temperature",
         "type": "number"
      },
      "response_type": {
         "default": "multiple paragraphs",
         "title": "Response Type",
         "type": "string"
      }
   }
}

Fields:

max_tokens (int)
response_type (str)
temperature (float)

field max_tokens: int = 1500#

field response_type: str = 'multiple paragraphs'#

field temperature: float = 0.0#