Local Search
In [1]:
Copied!
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License.
In [2]:
Copied!
import os
import pandas as pd
import tiktoken
from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
read_indexer_covariates,
read_indexer_entities,
read_indexer_relationships,
read_indexer_reports,
read_indexer_text_units,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore
import os
import pandas as pd
import tiktoken
from graphrag.query.context_builder.entity_extraction import EntityVectorStoreKey
from graphrag.query.indexer_adapters import (
read_indexer_covariates,
read_indexer_entities,
read_indexer_relationships,
read_indexer_reports,
read_indexer_text_units,
)
from graphrag.query.llm.oai.chat_openai import ChatOpenAI
from graphrag.query.llm.oai.embedding import OpenAIEmbedding
from graphrag.query.llm.oai.typing import OpenaiApiType
from graphrag.query.question_gen.local_gen import LocalQuestionGen
from graphrag.query.structured_search.local_search.mixed_context import (
LocalSearchMixedContext,
)
from graphrag.query.structured_search.local_search.search import LocalSearch
from graphrag.vector_stores.lancedb import LanceDBVectorStore
Local Search Example¶
Local search method generates answers by combining relevant data from the AI-extracted knowledge-graph with text chunks of the raw documents. This method is suitable for questions that require an understanding of specific entities mentioned in the documents (e.g. What are the healing properties of chamomile?).
Load text units and graph data tables as context for local search¶
- In this test we first load indexing outputs from parquet files to dataframes, then convert these dataframes into collections of data objects aligning with the knowledge model.
Load tables to dataframes¶
In [3]:
Copied!
INPUT_DIR = "./inputs/operation dulce"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2
INPUT_DIR = "./inputs/operation dulce"
LANCEDB_URI = f"{INPUT_DIR}/lancedb"
COMMUNITY_REPORT_TABLE = "create_final_community_reports"
ENTITY_TABLE = "create_final_nodes"
ENTITY_EMBEDDING_TABLE = "create_final_entities"
RELATIONSHIP_TABLE = "create_final_relationships"
COVARIATE_TABLE = "create_final_covariates"
TEXT_UNIT_TABLE = "create_final_text_units"
COMMUNITY_LEVEL = 2
Read entities¶
In [4]:
Copied!
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
collection_name="default-entity-description",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
print(f"Entity count: {len(entity_df)}")
entity_df.head()
# read nodes table to get community and degree data
entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet")
entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet")
entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL)
# load description embeddings to an in-memory lancedb vectorstore
# to connect to a remote db, specify url and port values.
description_embedding_store = LanceDBVectorStore(
collection_name="default-entity-description",
)
description_embedding_store.connect(db_uri=LANCEDB_URI)
print(f"Entity count: {len(entity_df)}")
entity_df.head()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[4], line 5 2 entity_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_TABLE}.parquet") 3 entity_embedding_df = pd.read_parquet(f"{INPUT_DIR}/{ENTITY_EMBEDDING_TABLE}.parquet") ----> 5 entities = read_indexer_entities(entity_df, entity_embedding_df, COMMUNITY_LEVEL) 7 # load description embeddings to an in-memory lancedb vectorstore 8 # to connect to a remote db, specify url and port values. 9 description_embedding_store = LanceDBVectorStore( 10 collection_name="default-entity-description", 11 ) File ~/work/graphrag/graphrag/graphrag/query/indexer_adapters.py:154, in read_indexer_entities(final_nodes, final_entities, community_level) 149 final_df = nodes_df.merge(entities_df, on="id", how="inner").drop_duplicates( 150 subset=["id"] 151 ) 153 # read entity dataframe to knowledge model objects --> 154 return read_entities( 155 df=final_df, 156 id_col="id", 157 title_col="title", 158 type_col="type", 159 short_id_col="human_readable_id", 160 description_col="description", 161 community_col="community", 162 rank_col="degree", 163 name_embedding_col=None, 164 description_embedding_col="description_embedding", 165 text_unit_ids_col="text_unit_ids", 166 ) File ~/work/graphrag/graphrag/graphrag/query/input/loaders/dfs.py:44, in read_entities(df, id_col, short_id_col, title_col, type_col, description_col, name_embedding_col, description_embedding_col, community_col, text_unit_ids_col, rank_col, attributes_cols) 39 entities = [] 40 for idx, row in df.iterrows(): 41 entity = Entity( 42 id=to_str(row, id_col), 43 short_id=to_optional_str(row, short_id_col) if short_id_col else str(idx), ---> 44 title=to_str(row, title_col), 45 type=to_optional_str(row, type_col), 46 description=to_optional_str(row, description_col), 47 name_embedding=to_optional_list(row, name_embedding_col, item_type=float), 48 description_embedding=to_optional_list( 49 row, description_embedding_col, item_type=float 50 ), 51 community_ids=to_optional_list(row, community_col, item_type=str), 52 text_unit_ids=to_optional_list(row, text_unit_ids_col), 53 rank=to_optional_int(row, rank_col), 54 attributes=( 55 {col: row.get(col) for col in attributes_cols} 56 if attributes_cols 57 else None 58 ), 59 ) 60 entities.append(entity) 61 return entities File ~/work/graphrag/graphrag/graphrag/query/input/loaders/utils.py:19, in to_str(data, column_name) 17 return str(data[column_name]) 18 msg = f"Column {column_name} not found in data" ---> 19 raise ValueError(msg) ValueError: Column title not found in data
Read relationships¶
In [5]:
Copied!
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)
print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()
relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet")
relationships = read_indexer_relationships(relationship_df)
print(f"Relationship count: {len(relationship_df)}")
relationship_df.head()
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) Cell In[5], line 2 1 relationship_df = pd.read_parquet(f"{INPUT_DIR}/{RELATIONSHIP_TABLE}.parquet") ----> 2 relationships = read_indexer_relationships(relationship_df) 4 print(f"Relationship count: {len(relationship_df)}") 5 relationship_df.head() File ~/work/graphrag/graphrag/graphrag/query/indexer_adapters.py:66, in read_indexer_relationships(final_relationships) 64 def read_indexer_relationships(final_relationships: pd.DataFrame) -> list[Relationship]: 65 """Read in the Relationships from the raw indexing outputs.""" ---> 66 return read_relationships( 67 df=final_relationships, 68 short_id_col="human_readable_id", 69 rank_col="combined_degree", 70 description_embedding_col=None, 71 attributes_cols=None, 72 ) File ~/work/graphrag/graphrag/graphrag/query/input/loaders/dfs.py:91, in read_relationships(df, id_col, short_id_col, source_col, target_col, description_col, rank_col, description_embedding_col, weight_col, text_unit_ids_col, attributes_cols) 78 relationships = [] 79 for idx, row in df.iterrows(): 80 rel = Relationship( 81 id=to_str(row, id_col), 82 short_id=to_optional_str(row, short_id_col) if short_id_col else str(idx), 83 source=to_str(row, source_col), 84 target=to_str(row, target_col), 85 description=to_optional_str(row, description_col), 86 description_embedding=to_optional_list( 87 row, description_embedding_col, item_type=float 88 ), 89 weight=to_optional_float(row, weight_col), 90 text_unit_ids=to_optional_list(row, text_unit_ids_col, item_type=str), ---> 91 rank=to_optional_int(row, rank_col), 92 attributes=( 93 {col: row.get(col) for col in attributes_cols} 94 if attributes_cols 95 else None 96 ), 97 ) 98 relationships.append(rel) 99 return relationships File ~/work/graphrag/graphrag/graphrag/query/input/loaders/utils.py:135, in to_optional_int(data, column_name) 133 else: 134 msg = f"Column {column_name} not found in data" --> 135 raise ValueError(msg) 137 return int(value) ValueError: Column combined_degree not found in data
In [6]:
Copied!
# NOTE: covariates are turned off by default, because they generally need prompt tuning to be valuable
# Please see the GRAPHRAG_CLAIM_* settings
covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")
claims = read_indexer_covariates(covariate_df)
print(f"Claim records: {len(claims)}")
covariates = {"claims": claims}
# NOTE: covariates are turned off by default, because they generally need prompt tuning to be valuable
# Please see the GRAPHRAG_CLAIM_* settings
covariate_df = pd.read_parquet(f"{INPUT_DIR}/{COVARIATE_TABLE}.parquet")
claims = read_indexer_covariates(covariate_df)
print(f"Claim records: {len(claims)}")
covariates = {"claims": claims}
Claim records: 156
Read community reports¶
In [7]:
Copied!
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
print(f"Report records: {len(report_df)}")
report_df.head()
report_df = pd.read_parquet(f"{INPUT_DIR}/{COMMUNITY_REPORT_TABLE}.parquet")
reports = read_indexer_reports(report_df, entity_df, COMMUNITY_LEVEL)
print(f"Report records: {len(report_df)}")
report_df.head()
Report records: 20
Out[7]:
community | full_content | level | rank | title | rank_explanation | summary | findings | full_content_json | id | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 10 | # Paranormal Military Squad at Dulce Base: Dec... | 1 | 8.5 | Paranormal Military Squad at Dulce Base: Decod... | The impact severity rating is high due to the ... | The Paranormal Military Squad, stationed at Du... | [{'explanation': 'Jordan is a central figure i... | {\n "title": "Paranormal Military Squad at ... | 1ba2d200-dd26-4693-affe-a5539d0a0e0d |
1 | 11 | # Dulce and Paranormal Military Squad Operatio... | 1 | 8.5 | Dulce and Paranormal Military Squad Operations | The impact severity rating is high due to the ... | The community centers around Dulce, a secretiv... | [{'explanation': 'Dulce is described as a top-... | {\n "title": "Dulce and Paranormal Military... | a8a530b0-ae6b-44ea-b11c-9f70d138298d |
2 | 12 | # Paranormal Military Squad and Dulce Base Ope... | 1 | 7.5 | Paranormal Military Squad and Dulce Base Opera... | The impact severity rating is relatively high ... | The community centers around the Paranormal Mi... | [{'explanation': 'Taylor is a central figure w... | {\n "title": "Paranormal Military Squad and... | 0478975b-c805-4cc1-b746-82f3e689e2f3 |
3 | 13 | # Mission Dynamics and Leadership: Cruz and Wa... | 1 | 7.5 | Mission Dynamics and Leadership: Cruz and Wash... | The impact severity rating is relatively high ... | This report explores the intricate dynamics of... | [{'explanation': 'Cruz is a central figure in ... | {\n "title": "Mission Dynamics and Leadersh... | b56f6e68-3951-4f07-8760-63700944a375 |
4 | 14 | # Dulce Base and Paranormal Military Squad: Br... | 1 | 8.5 | Dulce Base and Paranormal Military Squad: Brid... | The impact severity rating is high due to the ... | The community centers around the Dulce Base, a... | [{'explanation': 'Sam Rivera, a member of the ... | {\n "title": "Dulce Base and Paranormal Mil... | 736e7006-d050-4abb-a122-00febf3f540f |
Read text units¶
In [8]:
Copied!
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)
print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()
text_unit_df = pd.read_parquet(f"{INPUT_DIR}/{TEXT_UNIT_TABLE}.parquet")
text_units = read_indexer_text_units(text_unit_df)
print(f"Text unit records: {len(text_unit_df)}")
text_unit_df.head()
Text unit records: 50
Out[8]:
id | text | n_tokens | document_ids | entity_ids | relationship_ids | covariate_ids | |
---|---|---|---|---|---|---|---|
0 | 06bdff339c02ab35c80fa49320d5da66 | # Operation: Dulce\n\n## Chapter 1\n\nThe thru... | 600 | [958fdd043f17ade63cb13570b59df295] | [b45241d70f0e43fca764df95b2b81f77, 4119fd06010... | [148fffeb994541b2b4b6dcefda7001a8, 89c08e79329... | [439081a3-bfeb-4693-968a-0f6189d8fa50, 3399e3d... |
1 | 28dc4aa41a3e99deb7f354682f3e9904 | 's authoritarian performance. _Protocols_, Jor... | 600 | [958fdd043f17ade63cb13570b59df295] | [4119fd06010c494caa07f439b333f4c5, 077d2820ae1... | [9a6f414210e14841a5b0e661aedc898d, db541b72609... | [b09594cb-d4b7-4de4-a1af-97778300eb1b, f7c5ea4... |
2 | 813db3138ef511c34be86f841f68aa8f | , rehearsing the speech for their subordinates... | 600 | [958fdd043f17ade63cb13570b59df295] | [b45241d70f0e43fca764df95b2b81f77, 077d2820ae1... | [0467928aa65e4a4fba62bdb1467e3a54, 7cc3356d38d... | [7642fe21-7cb9-4428-848b-d2e3f5ab10ca, 6fe3d6c... |
3 | d0afd106abf3a2966ff88a99eef710db | , weighing his words carefully. "Our tech is t... | 600 | [958fdd043f17ade63cb13570b59df295] | [b45241d70f0e43fca764df95b2b81f77, 4119fd06010... | [148fffeb994541b2b4b6dcefda7001a8, 89c08e79329... | [956e4c22-e343-4b5d-ad3c-d44ca3ce5fb5] |
4 | f4c7c95d7bf621c6eb73d331cbf0b608 | stepping into their exchange. The authority i... | 600 | [958fdd043f17ade63cb13570b59df295] | [077d2820ae1845bcbb1803379a3d1eae, 1fd3fa8bb5a... | [478e4c72d8fb46dd8cc9f0691c9878fd, 82b0446e7c9... | [a54eda4d-b5bf-471d-989f-370eb9aff961, 9f85274... |
In [9]:
Copied!
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]
llm = ChatOpenAI(
api_key=api_key,
model=llm_model,
api_type=OpenaiApiType.OpenAI, # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
max_retries=20,
)
token_encoder = tiktoken.get_encoding("cl100k_base")
text_embedder = OpenAIEmbedding(
api_key=api_key,
api_base=None,
api_type=OpenaiApiType.OpenAI,
model=embedding_model,
deployment_name=embedding_model,
max_retries=20,
)
api_key = os.environ["GRAPHRAG_API_KEY"]
llm_model = os.environ["GRAPHRAG_LLM_MODEL"]
embedding_model = os.environ["GRAPHRAG_EMBEDDING_MODEL"]
llm = ChatOpenAI(
api_key=api_key,
model=llm_model,
api_type=OpenaiApiType.OpenAI, # OpenaiApiType.OpenAI or OpenaiApiType.AzureOpenAI
max_retries=20,
)
token_encoder = tiktoken.get_encoding("cl100k_base")
text_embedder = OpenAIEmbedding(
api_key=api_key,
api_base=None,
api_type=OpenaiApiType.OpenAI,
model=embedding_model,
deployment_name=embedding_model,
max_retries=20,
)
Create local search context builder¶
In [10]:
Copied!
context_builder = LocalSearchMixedContext(
community_reports=reports,
text_units=text_units,
entities=entities,
relationships=relationships,
# if you did not run covariates during indexing, set this to None
covariates=covariates,
entity_text_embeddings=description_embedding_store,
embedding_vectorstore_key=EntityVectorStoreKey.ID, # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
text_embedder=text_embedder,
token_encoder=token_encoder,
)
context_builder = LocalSearchMixedContext(
community_reports=reports,
text_units=text_units,
entities=entities,
relationships=relationships,
# if you did not run covariates during indexing, set this to None
covariates=covariates,
entity_text_embeddings=description_embedding_store,
embedding_vectorstore_key=EntityVectorStoreKey.ID, # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE
text_embedder=text_embedder,
token_encoder=token_encoder,
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[10], line 4 1 context_builder = LocalSearchMixedContext( 2 community_reports=reports, 3 text_units=text_units, ----> 4 entities=entities, 5 relationships=relationships, 6 # if you did not run covariates during indexing, set this to None 7 covariates=covariates, 8 entity_text_embeddings=description_embedding_store, 9 embedding_vectorstore_key=EntityVectorStoreKey.ID, # if the vectorstore uses entity title as ids, set this to EntityVectorStoreKey.TITLE 10 text_embedder=text_embedder, 11 token_encoder=token_encoder, 12 ) NameError: name 'entities' is not defined
Create local search engine¶
In [11]:
Copied!
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.
local_context_params = {
"text_unit_prop": 0.5,
"community_prop": 0.1,
"conversation_history_max_turns": 5,
"conversation_history_user_turns_only": True,
"top_k_mapped_entities": 10,
"top_k_relationships": 10,
"include_entity_rank": True,
"include_relationship_weight": True,
"include_community_rank": False,
"return_candidate_context": False,
"embedding_vectorstore_key": EntityVectorStoreKey.ID, # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
"max_tokens": 12_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}
llm_params = {
"max_tokens": 2_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
"temperature": 0.0,
}
# text_unit_prop: proportion of context window dedicated to related text units
# community_prop: proportion of context window dedicated to community reports.
# The remaining proportion is dedicated to entities and relationships. Sum of text_unit_prop and community_prop should be <= 1
# conversation_history_max_turns: maximum number of turns to include in the conversation history.
# conversation_history_user_turns_only: if True, only include user queries in the conversation history.
# top_k_mapped_entities: number of related entities to retrieve from the entity description embedding store.
# top_k_relationships: control the number of out-of-network relationships to pull into the context window.
# include_entity_rank: if True, include the entity rank in the entity table in the context window. Default entity rank = node degree.
# include_relationship_weight: if True, include the relationship weight in the context window.
# include_community_rank: if True, include the community rank in the context window.
# return_candidate_context: if True, return a set of dataframes containing all candidate entity/relationship/covariate records that
# could be relevant. Note that not all of these records will be included in the context window. The "in_context" column in these
# dataframes indicates whether the record is included in the context window.
# max_tokens: maximum number of tokens to use for the context window.
local_context_params = {
"text_unit_prop": 0.5,
"community_prop": 0.1,
"conversation_history_max_turns": 5,
"conversation_history_user_turns_only": True,
"top_k_mapped_entities": 10,
"top_k_relationships": 10,
"include_entity_rank": True,
"include_relationship_weight": True,
"include_community_rank": False,
"return_candidate_context": False,
"embedding_vectorstore_key": EntityVectorStoreKey.ID, # set this to EntityVectorStoreKey.TITLE if the vectorstore uses entity title as ids
"max_tokens": 12_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 5000)
}
llm_params = {
"max_tokens": 2_000, # change this based on the token limit you have on your model (if you are using a model with 8k limit, a good setting could be 1000=1500)
"temperature": 0.0,
}
In [12]:
Copied!
search_engine = LocalSearch(
llm=llm,
context_builder=context_builder,
token_encoder=token_encoder,
llm_params=llm_params,
context_builder_params=local_context_params,
response_type="multiple paragraphs", # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)
search_engine = LocalSearch(
llm=llm,
context_builder=context_builder,
token_encoder=token_encoder,
llm_params=llm_params,
context_builder_params=local_context_params,
response_type="multiple paragraphs", # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[12], line 3 1 search_engine = LocalSearch( 2 llm=llm, ----> 3 context_builder=context_builder, 4 token_encoder=token_encoder, 5 llm_params=llm_params, 6 context_builder_params=local_context_params, 7 response_type="multiple paragraphs", # free form text describing the response type and format, can be anything, e.g. prioritized list, single paragraph, multiple paragraphs, multiple-page report 8 ) NameError: name 'context_builder' is not defined
Run local search on sample queries¶
In [13]:
Copied!
result = await search_engine.asearch("Tell me about Agent Mercer")
print(result.response)
result = await search_engine.asearch("Tell me about Agent Mercer")
print(result.response)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[13], line 1 ----> 1 result = await search_engine.asearch("Tell me about Agent Mercer") 2 print(result.response) NameError: name 'search_engine' is not defined
In [14]:
Copied!
question = "Tell me about Dr. Jordan Hayes"
result = await search_engine.asearch(question)
print(result.response)
question = "Tell me about Dr. Jordan Hayes"
result = await search_engine.asearch(question)
print(result.response)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[14], line 2 1 question = "Tell me about Dr. Jordan Hayes" ----> 2 result = await search_engine.asearch(question) 3 print(result.response) NameError: name 'search_engine' is not defined
Inspecting the context data used to generate the response¶
In [15]:
Copied!
result.context_data["entities"].head()
result.context_data["entities"].head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[15], line 1 ----> 1 result.context_data["entities"].head() NameError: name 'result' is not defined
In [16]:
Copied!
result.context_data["relationships"].head()
result.context_data["relationships"].head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[16], line 1 ----> 1 result.context_data["relationships"].head() NameError: name 'result' is not defined
In [17]:
Copied!
if "reports" in result.context_data:
result.context_data["reports"].head()
if "reports" in result.context_data:
result.context_data["reports"].head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[17], line 1 ----> 1 if "reports" in result.context_data: 2 result.context_data["reports"].head() NameError: name 'result' is not defined
In [18]:
Copied!
result.context_data["sources"].head()
result.context_data["sources"].head()
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[18], line 1 ----> 1 result.context_data["sources"].head() NameError: name 'result' is not defined
In [19]:
Copied!
if "claims" in result.context_data:
print(result.context_data["claims"].head())
if "claims" in result.context_data:
print(result.context_data["claims"].head())
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[19], line 1 ----> 1 if "claims" in result.context_data: 2 print(result.context_data["claims"].head()) NameError: name 'result' is not defined
Question Generation¶
This function takes a list of user queries and generates the next candidate questions.
In [20]:
Copied!
question_generator = LocalQuestionGen(
llm=llm,
context_builder=context_builder,
token_encoder=token_encoder,
llm_params=llm_params,
context_builder_params=local_context_params,
)
question_generator = LocalQuestionGen(
llm=llm,
context_builder=context_builder,
token_encoder=token_encoder,
llm_params=llm_params,
context_builder_params=local_context_params,
)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[20], line 3 1 question_generator = LocalQuestionGen( 2 llm=llm, ----> 3 context_builder=context_builder, 4 token_encoder=token_encoder, 5 llm_params=llm_params, 6 context_builder_params=local_context_params, 7 ) NameError: name 'context_builder' is not defined
In [21]:
Copied!
question_history = [
"Tell me about Agent Mercer",
"What happens in Dulce military base?",
]
candidate_questions = await question_generator.agenerate(
question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)
question_history = [
"Tell me about Agent Mercer",
"What happens in Dulce military base?",
]
candidate_questions = await question_generator.agenerate(
question_history=question_history, context_data=None, question_count=5
)
print(candidate_questions.response)
--------------------------------------------------------------------------- NameError Traceback (most recent call last) Cell In[21], line 5 1 question_history = [ 2 "Tell me about Agent Mercer", 3 "What happens in Dulce military base?", 4 ] ----> 5 candidate_questions = await question_generator.agenerate( 6 question_history=question_history, context_data=None, question_count=5 7 ) 8 print(candidate_questions.response) NameError: name 'question_generator' is not defined