Evaluate with langchain’s evaluator#
Learning Objectives - Upon completing this tutorial, you should be able to:
Convert LangChain criteria evaluator applications to
flex flow
.Use
CustomConnection
to store secrets.
0. Install dependent packages#
%%capture --no-stderr
%pip install -r ./requirements.txt
1. Trace your langchain evaluator with prompt flow#
Initialize a pf client#
from promptflow.client import PFClient
pf = PFClient()
Create a custom connection to protect your API key#
You can protect your API key in custom connection’s secrets.
import os
from dotenv import load_dotenv
from promptflow.entities import CustomConnection
conn_name = "my_llm_connection"
try:
conn = pf.connections.get(name=conn_name)
print("using existing connection")
except:
if "AZURE_OPENAI_API_KEY" not in os.environ:
# load environment variables from .env file
load_dotenv()
# put API key in secrets
connection = CustomConnection(
name=conn_name,
configs={
"azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
},
secrets={
# store API key
# "anthropic_api_key": "<your-api-key>",
"openai_api_key": os.environ["AZURE_OPENAI_API_KEY"],
},
)
# Create the connection, note that all secret values will be scrubbed in the returned result
conn = pf.connections.create_or_update(connection)
print("successfully created connection")
print(conn)
Test the evaluator with trace#
from eval_conciseness import LangChainEvaluator
evaluator = LangChainEvaluator(custom_connection=conn)
result = evaluator(
prediction="What's 2+2? That's an elementary question. The answer you're looking for is that two and two is four.",
input="What's 2+2?",
)
print(result)
2. Batch run the evaluator with flow yaml#
Create a flow.flex.yaml file to define a flow which entry pointing to the python function we defined.
data = "./data.jsonl" # path to the data file
# create run with the flow function and data
base_run = pf.run(
flow="./flow.flex.yaml",
# reference custom connection by name
init={
"custom_connection": "my_llm_connection",
},
data=data,
column_mapping={
"prediction": "${data.prediction}",
"input": "${data.input}",
},
stream=True,
)
details = pf.get_details(base_run)
details.head(10)
pf.visualize([base_run])