Chat with PDF in Azure#

Authored by:

This is a simple flow that allow you to ask questions about the content of a PDF file and get answers. You can run the flow with a URL to a PDF file and question as argument. Once it’s launched it will download the PDF and build an index of the content. Then when you ask a question, it will look up the index to retrieve relevant content and post the question with the relevant content to OpenAI chat model (gpt-3.5-turbo or gpt4) to get an answer.

0. Install dependencies#

%pip install -r requirements.txt

1. Connect to Azure Machine Learning Workspace#

from azure.identity import DefaultAzureCredential, InteractiveBrowserCredential

try:
    credential = DefaultAzureCredential()
    # Check if given credential can get token successfully.
    credential.get_token("https://management.azure.com/.default")
except Exception as ex:
    # Fall back to InteractiveBrowserCredential in case DefaultAzureCredential not work
    credential = InteractiveBrowserCredential()

1.1 Get familiar with the primary interface - PFClient#

import promptflow.azure as azure

# Get a handle to workspace
pf = azure.PFClient.from_config(credential=credential)

1.2 Create necessary connections#

Connection in prompt flow is for managing settings of your application behaviors incl. how to talk to different services (Azure OpenAI for example).

Prepare your Azure OpenAI resource follow this instruction and get your api_key if you don’t have one.

Please go to workspace portal, click Prompt flow -> Connections -> Create, then follow the instruction to create your own connections. Learn more on connections.

conn_name = "open_ai_connection"

# TODO integrate with azure.ai sdk
# currently we only support create connection in Azure ML Studio UI
# raise Exception(f"Please create {conn_name} connection in Azure ML Studio.")

2. Run a flow with setting (context size 2K)#

flow_path = "."
data_path = "./data/bert-paper-qna-3-line.jsonl"

config_2k_context = {
    "EMBEDDING_MODEL_DEPLOYMENT_NAME": "text-embedding-ada-002",
    "CHAT_MODEL_DEPLOYMENT_NAME": "gpt-35-turbo",
    "PROMPT_TOKEN_LIMIT": 2000,
    "MAX_COMPLETION_TOKENS": 256,
    "VERBOSE": True,
    "CHUNK_SIZE": 1024,
    "CHUNK_OVERLAP": 32,
}

column_mapping = {
    "question": "${data.question}",
    "pdf_url": "${data.pdf_url}",
    "chat_history": "${data.chat_history}",
    "config": config_2k_context,
}

run_2k_context = pf.run(
    flow=flow_path,
    data=data_path,
    column_mapping=column_mapping,
    display_name="chat_with_pdf_2k_context",
    tags={"chat_with_pdf": "", "1st_round": ""},
)
pf.stream(run_2k_context)

print(run_2k_context)

detail = pf.get_details(run_2k_context)

detail

3. Evaluate the “groundedness”#

The eval-groundedness flow is using ChatGPT/GPT4 model to grade the answers generated by chat-with-pdf flow.

eval_groundedness_flow_path = "../../evaluation/eval-groundedness/"
eval_groundedness_2k_context = pf.run(
    flow=eval_groundedness_flow_path,
    run=run_2k_context,
    column_mapping={
        "question": "${run.inputs.question}",
        "answer": "${run.outputs.answer}",
        "context": "${run.outputs.context}",
    },
    display_name="eval_groundedness_2k_context",
)
pf.stream(eval_groundedness_2k_context)

print(eval_groundedness_2k_context)

4. Try a different configuration and evaluate again - experimentation#

flow_path = "."
data_path = "./data/bert-paper-qna-3-line.jsonl"

config_3k_context = {
    "EMBEDDING_MODEL_DEPLOYMENT_NAME": "text-embedding-ada-002",
    "CHAT_MODEL_DEPLOYMENT_NAME": "gpt-35-turbo",
    "PROMPT_TOKEN_LIMIT": 3000,  # different from 2k context
    "MAX_COMPLETION_TOKENS": 256,
    "VERBOSE": True,
    "CHUNK_SIZE": 1024,
    "CHUNK_OVERLAP": 32,
}

column_mapping = {
    "question": "${data.question}",
    "pdf_url": "${data.pdf_url}",
    "chat_history": "${data.chat_history}",
    "config": config_3k_context,
}
run_3k_context = pf.run(
    flow=flow_path,
    data=data_path,
    column_mapping=column_mapping,
    display_name="chat_with_pdf_3k_context",
    tags={"chat_with_pdf": "", "2nd_round": ""},
)
pf.stream(run_3k_context)

print(run_3k_context)

detail = pf.get_details(run_3k_context)

detail

eval_groundedness_3k_context = pf.run(
    flow=eval_groundedness_flow_path,
    run=run_3k_context,
    column_mapping={
        "question": "${run.inputs.question}",
        "answer": "${run.outputs.answer}",
        "context": "${run.outputs.context}",
    },
    display_name="eval_groundedness_3k_context",
)
pf.stream(eval_groundedness_3k_context)

print(eval_groundedness_3k_context)

pf.get_details(eval_groundedness_3k_context)

pf.visualize([eval_groundedness_2k_context, eval_groundedness_3k_context])