Show code cell source
# %load -r 3:18 ../_init.py
import pathlib
import sammo
from sammo.runners import OpenAIChat
from sammo.base import Template, EvaluationScore
from sammo.components import Output, GenerateText, ForEach, Union
from sammo.extractors import ExtractRegex
from sammo.data import DataTable
import json
import requests
import os
if not "OPENAI_API_KEY" in os.environ:
raise ValueError("Please set the environment variable 'OPENAI_API_KEY'.")
_ = sammo.setup_logger("WARNING") # we're only interested in warnings for now
Structured Outputs#
There are two ways in which models offer parseable JSON objects:
By setting a flag that ensures that the output is some JSON object
By specifying the exact JSON schema that the output needs to adhere to
Option 2 is preferrable in general, the first option will likely disappear in future API versions.
Setting a flag#
For this, simply pass json_mode = True
to GenerateText
.
runner = OpenAIChat(
model_id="gpt-4o",
api_config={"api_key": os.environ["OPENAI_API_KEY"]},
cache=os.getenv("CACHE_FILE", "cache.tsv"),
timeout=30,
)
Output(GenerateText("Generate a list of 10 full names in JSON format.", json_mode=True)).run(runner)
+---------+---------------------------------------------------------+
| input | output |
+=========+=========================================================+
| None | { "names": [ "Emma Johnson", "Liam Smith", |
| | "Olivia Brown", "Noah Davis", "Ava Wilson", |
| | "Elijah Martinez", "Sophia Anderson", "Lucas |
| | Taylor", "Isabella Thomas", "Mason Moore" ] } |
+---------+---------------------------------------------------------+
Constants: None
What if we actually wanted first and last names as separate fields? We could provide the model with an example output, or:
Specifying a JSON schema#
Say we want something like
example = {"names": [{"first": "John", "last": "Smith"}]}
While you can manually write a schema, SAMMO
provides you with a convenience function that works in many cases.
schema = runner.guess_json_schema(example)
print(schema)
{
"type": "object",
"properties": {
"names": {
"type": "array",
"items": {
"type": "object",
"properties": {
"first": {
"type": "string"
},
"last": {
"type": "string"
}
},
"required": [
"first",
"last"
],
"additionalProperties": false
}
}
},
"required": [
"names"
],
"additionalProperties": false
}
That would have been quite some work! Letβs pass this to GenerateText
.
Output(GenerateText("Generate a list of 10 full names in JSON format.", json_mode=schema)).run(runner)
+---------+--------------------------------------------------------------+
| input | output |
+=========+==============================================================+
| None | {"names":[{"first":"Liam","last":"Johnson"},{"first":"Emma", |
| | "last":"Williams"},{"first":"Noah","last":"Brown"},{"first": |
| | "Olivia","last":"Jones"},{"first":"Ava","last":"Garcia"},{"f |
| | irst":"Sophia","last":"Martinez"},{"first":"Isabella","last" |
| | :"Davis"},{"first":"Mia","last":"Rodriguez"},{"first":"Charl |
| | otte","last":"Hernandez"},{"first":"Amelia","last":"Lopez"}] |
| | } |
+---------+--------------------------------------------------------------+
Constants: None