Show code cell source
# %load -r :19 ../_init.py
import pathlib
import sammo
from sammo.runners import OpenAIChat
from sammo.base import Template, EvaluationScore
from sammo.components import Output, GenerateText, ForEach, Union
from sammo.extractors import ExtractRegex
from sammo.data import DataTable
import json
import requests
API_CONFIG_FILE = pathlib.Path().cwd().parent.parent / "config" / "personal.openai"
API_CONFIG = ""
if API_CONFIG_FILE.exists():
API_CONFIG = API_CONFIG_FILE
if not API_CONFIG:
raise ValueError('Please set API_CONFIG to {"api_key": "YOUR_KEY"}')
_ = sammo.setup_logger("WARNING") # we're only interested in warnings for now
Rate Limiting#
Many APIs have rate limits, often in terms of number of requests within a certain time period or a total cost.
You have three options to specify rate limits in Runner
(in increasing order of flexibility):
Specify a number for the
rate_limit
parameter. This will enforce a requests per second limit equal to that number.Specify a list of
AtMost
objects that are applied in an logical AND fashion.Pass an instance of
Throttler
(or a subclass of it). This allows you to fine-tune some settings, e.g., how costs are calculated.
Simple rate limit (qps)#
runner = OpenAIChat(model_id="gpt-3.5-turbo-16k", api_config=API_CONFIG, rate_limit=1)
Output(GenerateText(Template("Output as a latin numeral: {{input}}"))).run(
runner, list(range(1,6))
)
minibatches[###################################################################################]5/5[00:04<00:00, 1.13it/s]
+---------+----------+
| input | output |
+=========+==========+
| 1 | I |
+---------+----------+
| 2 | II |
+---------+----------+
| 3 | III |
+---------+----------+
| 4 | IV |
+---------+----------+
| 5 | V |
+---------+----------+
Constants: None
As specified, SAMMO
issued exactly one prompt request per second.
Advanced rate limits#
Letβs say we want to make sure we never have more than 1 running request.
from sammo.throttler import AtMost
runner = OpenAIChat(model_id="gpt-3.5-turbo-16k", api_config=API_CONFIG, rate_limit=AtMost(1, "running"))
Output(GenerateText(Template("Output as a latin numeral: {{input}}"))).run(
runner, list(range(1,6))
)
minibatches[###################################################################################]5/5[00:02<00:00, 1.88it/s]
+---------+----------+
| input | output |
+=========+==========+
| 1 | I |
+---------+----------+
| 2 | II |
+---------+----------+
| 3 | III |
+---------+----------+
| 4 | IV |
+---------+----------+
| 5 | V |
+---------+----------+
Constants: None
Or, you want to run five queries every 10 seconds, but make sure they have at least 100ms breaks.
limits = [AtMost(1, "calls", 0.1), AtMost(5, "calls", 10)]
runner = OpenAIChat(model_id="gpt-3.5-turbo-16k", api_config=API_CONFIG, rate_limit=limits)
Output(GenerateText(Template("Output as a latin numeral: {{input}}"))).run(
runner, list(range(1,6))
)
minibatches[###################################################################################]5/5[00:00<00:00, 5.08it/s]
+---------+----------+
| input | output |
+=========+==========+
| 1 | I |
+---------+----------+
| 2 | II |
+---------+----------+
| 3 | III |
+---------+----------+
| 4 | IV |
+---------+----------+
| 5 | V |
+---------+----------+
Constants: None