Source code for autogen_ext.models.openai._openai_client
importasyncioimportinspectimportjsonimportloggingimportmathimportosimportreimportwarningsfromasyncioimportTaskfromdataclassesimportdataclassfromimportlib.metadataimportPackageNotFoundError,versionfromtypingimport(Any,AsyncGenerator,Callable,Dict,List,Mapping,Optional,Sequence,Set,Type,Union,cast,)importtiktokenfromautogen_coreimport(EVENT_LOGGER_NAME,TRACE_LOGGER_NAME,CancellationToken,Component,FunctionCall,Image,)fromautogen_core.loggingimportLLMCallEvent,LLMStreamEndEvent,LLMStreamStartEventfromautogen_core.modelsimport(AssistantMessage,ChatCompletionClient,ChatCompletionTokenLogprob,CreateResult,LLMMessage,ModelCapabilities,# type: ignoreModelFamily,ModelInfo,RequestUsage,SystemMessage,TopLogprob,UserMessage,validate_model_info,)fromautogen_core.toolsimportTool,ToolSchemafromopenaiimportNOT_GIVEN,AsyncAzureOpenAI,AsyncOpenAIfromopenai.types.chatimport(ChatCompletion,ChatCompletionChunk,ChatCompletionContentPartParam,ChatCompletionMessageParam,ChatCompletionRole,ChatCompletionToolParam,ParsedChatCompletion,ParsedChoice,completion_create_params,)fromopenai.types.chat.chat_completionimportChoicefromopenai.types.shared_paramsimport(FunctionDefinition,FunctionParameters,ResponseFormatJSONObject,ResponseFormatText,)frompydanticimportBaseModel,SecretStrfromtyping_extensionsimportSelf,Unpackfrom.._utils.normalize_stop_reasonimportnormalize_stop_reasonfrom.._utils.parse_r1_contentimportparse_r1_contentfrom.import_model_infofrom._transformationimport(get_transformer,)from._utilsimportassert_valid_namefrom.configimport(AzureOpenAIClientConfiguration,AzureOpenAIClientConfigurationConfigModel,OpenAIClientConfiguration,OpenAIClientConfigurationConfigModel,)logger=logging.getLogger(EVENT_LOGGER_NAME)trace_logger=logging.getLogger(TRACE_LOGGER_NAME)openai_init_kwargs=set(inspect.getfullargspec(AsyncOpenAI.__init__).kwonlyargs)aopenai_init_kwargs=set(inspect.getfullargspec(AsyncAzureOpenAI.__init__).kwonlyargs)create_kwargs=set(completion_create_params.CompletionCreateParamsBase.__annotations__.keys())|set(("timeout","stream"))# Only single choice alloweddisallowed_create_args=set(["stream","messages","function_call","functions","n"])required_create_args:Set[str]=set(["model"])USER_AGENT_HEADER_NAME="User-Agent"try:version_info=version("autogen-ext")exceptPackageNotFoundError:version_info="dev"AZURE_OPENAI_USER_AGENT=f"autogen-python/{version_info}"def_azure_openai_client_from_config(config:Mapping[str,Any])->AsyncAzureOpenAI:# Take a copycopied_config=dict(config).copy()# Shave down the config to just the AzureOpenAIChatCompletionClient kwargsazure_config={k:vfork,vincopied_config.items()ifkinaopenai_init_kwargs}DEFAULT_HEADERS_KEY="default_headers"ifDEFAULT_HEADERS_KEYnotinazure_config:azure_config[DEFAULT_HEADERS_KEY]={}azure_config[DEFAULT_HEADERS_KEY][USER_AGENT_HEADER_NAME]=(f"{AZURE_OPENAI_USER_AGENT}{azure_config[DEFAULT_HEADERS_KEY][USER_AGENT_HEADER_NAME]}"ifUSER_AGENT_HEADER_NAMEinazure_config[DEFAULT_HEADERS_KEY]elseAZURE_OPENAI_USER_AGENT)returnAsyncAzureOpenAI(**azure_config)def_openai_client_from_config(config:Mapping[str,Any])->AsyncOpenAI:# Shave down the config to just the OpenAI kwargsopenai_config={k:vfork,vinconfig.items()ifkinopenai_init_kwargs}returnAsyncOpenAI(**openai_config)def_create_args_from_config(config:Mapping[str,Any])->Dict[str,Any]:create_args={k:vfork,vinconfig.items()ifkincreate_kwargs}create_args_keys=set(create_args.keys())ifnotrequired_create_args.issubset(create_args_keys):raiseValueError(f"Required create args are missing: {required_create_args-create_args_keys}")ifdisallowed_create_args.intersection(create_args_keys):raiseValueError(f"Disallowed create args are present: {disallowed_create_args.intersection(create_args_keys)}")returncreate_args# TODO check types# oai_system_message_schema = type2schema(ChatCompletionSystemMessageParam)# oai_user_message_schema = type2schema(ChatCompletionUserMessageParam)# oai_assistant_message_schema = type2schema(ChatCompletionAssistantMessageParam)# oai_tool_message_schema = type2schema(ChatCompletionToolMessageParam)deftype_to_role(message:LLMMessage)->ChatCompletionRole:ifisinstance(message,SystemMessage):return"system"elifisinstance(message,UserMessage):return"user"elifisinstance(message,AssistantMessage):return"assistant"else:return"tool"defto_oai_type(message:LLMMessage,prepend_name:bool=False,model:str="unknown",model_family:str=ModelFamily.UNKNOWN)->Sequence[ChatCompletionMessageParam]:context={"prepend_name":prepend_name,}transformers=get_transformer("openai",model,model_family)defraise_value_error(message:LLMMessage,context:Dict[str,Any])->Sequence[ChatCompletionMessageParam]:raiseValueError(f"Unknown message type: {type(message)}")transformer:Callable[[LLMMessage,Dict[str,Any]],Sequence[ChatCompletionMessageParam]]=transformers.get(type(message),raise_value_error)result=transformer(message,context)returnresultdefcalculate_vision_tokens(image:Image,detail:str="auto")->int:MAX_LONG_EDGE=2048BASE_TOKEN_COUNT=85TOKENS_PER_TILE=170MAX_SHORT_EDGE=768TILE_SIZE=512ifdetail=="low":returnBASE_TOKEN_COUNTwidth,height=image.image.size# Scale down to fit within a MAX_LONG_EDGE x MAX_LONG_EDGE square if necessaryifwidth>MAX_LONG_EDGEorheight>MAX_LONG_EDGE:aspect_ratio=width/heightifaspect_ratio>1:# Width is greater than heightwidth=MAX_LONG_EDGEheight=int(MAX_LONG_EDGE/aspect_ratio)else:# Height is greater than or equal to widthheight=MAX_LONG_EDGEwidth=int(MAX_LONG_EDGE*aspect_ratio)# Resize such that the shortest side is MAX_SHORT_EDGE if both dimensions exceed MAX_SHORT_EDGEaspect_ratio=width/heightifwidth>MAX_SHORT_EDGEandheight>MAX_SHORT_EDGE:ifaspect_ratio>1:# Width is greater than heightheight=MAX_SHORT_EDGEwidth=int(MAX_SHORT_EDGE*aspect_ratio)else:# Height is greater than or equal to widthwidth=MAX_SHORT_EDGEheight=int(MAX_SHORT_EDGE/aspect_ratio)# Calculate the number of tiles based on TILE_SIZEtiles_width=math.ceil(width/TILE_SIZE)tiles_height=math.ceil(height/TILE_SIZE)total_tiles=tiles_width*tiles_height# Calculate the total tokens based on the number of tiles and the base token counttotal_tokens=BASE_TOKEN_COUNT+TOKENS_PER_TILE*total_tilesreturntotal_tokensdef_add_usage(usage1:RequestUsage,usage2:RequestUsage)->RequestUsage:returnRequestUsage(prompt_tokens=usage1.prompt_tokens+usage2.prompt_tokens,completion_tokens=usage1.completion_tokens+usage2.completion_tokens,)defconvert_tools(tools:Sequence[Tool|ToolSchema],)->List[ChatCompletionToolParam]:result:List[ChatCompletionToolParam]=[]fortoolintools:ifisinstance(tool,Tool):tool_schema=tool.schemaelse:assertisinstance(tool,dict)tool_schema=toolresult.append(ChatCompletionToolParam(type="function",function=FunctionDefinition(name=tool_schema["name"],description=(tool_schema["description"]if"description"intool_schemaelse""),parameters=(cast(FunctionParameters,tool_schema["parameters"])if"parameters"intool_schemaelse{}),strict=(tool_schema["strict"]if"strict"intool_schemaelseFalse),),))# Check if all tools have valid names.fortool_paraminresult:assert_valid_name(tool_param["function"]["name"])returnresultdefnormalize_name(name:str)->str:""" LLMs sometimes ask functions while ignoring their own format requirements, this function should be used to replace invalid characters with "_". Prefer _assert_valid_name for validating user configuration or input """returnre.sub(r"[^a-zA-Z0-9_-]","_",name)[:64]defcount_tokens_openai(messages:Sequence[LLMMessage],model:str,*,add_name_prefixes:bool=False,tools:Sequence[Tool|ToolSchema]=[],model_family:str=ModelFamily.UNKNOWN,)->int:try:encoding=tiktoken.encoding_for_model(model)exceptKeyError:trace_logger.warning(f"Model {model} not found. Using cl100k_base encoding.")encoding=tiktoken.get_encoding("cl100k_base")tokens_per_message=3tokens_per_name=1num_tokens=0# Message tokens.formessageinmessages:num_tokens+=tokens_per_messageoai_message=to_oai_type(message,prepend_name=add_name_prefixes,model=model,model_family=model_family)foroai_message_partinoai_message:forkey,valueinoai_message_part.items():ifvalueisNone:continueifisinstance(message,UserMessage)andisinstance(value,list):typed_message_value=cast(List[ChatCompletionContentPartParam],value)assertlen(typed_message_value)==len(message.content),"Mismatch in message content and typed message value"# We need image properties that are only in the original messageforpart,content_partinzip(typed_message_value,message.content,strict=False):ifisinstance(content_part,Image):# TODO: add detail parameternum_tokens+=calculate_vision_tokens(content_part)elifisinstance(part,str):num_tokens+=len(encoding.encode(part))else:try:serialized_part=json.dumps(part)num_tokens+=len(encoding.encode(serialized_part))exceptTypeError:trace_logger.warning(f"Could not convert {part} to string, skipping.")else:ifnotisinstance(value,str):try:value=json.dumps(value)exceptTypeError:trace_logger.warning(f"Could not convert {value} to string, skipping.")continuenum_tokens+=len(encoding.encode(value))ifkey=="name":num_tokens+=tokens_per_namenum_tokens+=3# every reply is primed with <|start|>assistant<|message|># Tool tokens.oai_tools=convert_tools(tools)fortoolinoai_tools:function=tool["function"]tool_tokens=len(encoding.encode(function["name"]))if"description"infunction:tool_tokens+=len(encoding.encode(function["description"]))tool_tokens-=2if"parameters"infunction:parameters=function["parameters"]if"properties"inparameters:assertisinstance(parameters["properties"],dict)forpropertiesKeyinparameters["properties"]:# pyright: ignoreassertisinstance(propertiesKey,str)tool_tokens+=len(encoding.encode(propertiesKey))v=parameters["properties"][propertiesKey]# pyright: ignoreforfieldinv:# pyright: ignoreiffield=="type":tool_tokens+=2tool_tokens+=len(encoding.encode(v["type"]))# pyright: ignoreeliffield=="description":tool_tokens+=2tool_tokens+=len(encoding.encode(v["description"]))# pyright: ignoreeliffield=="enum":tool_tokens-=3foroinv["enum"]:# pyright: ignoretool_tokens+=3tool_tokens+=len(encoding.encode(o))# pyright: ignoreelse:trace_logger.warning(f"Not supported field {field}")tool_tokens+=11iflen(parameters["properties"])==0:# pyright: ignoretool_tokens-=2num_tokens+=tool_tokensnum_tokens+=12returnnum_tokens@dataclassclassCreateParams:messages:List[ChatCompletionMessageParam]tools:List[ChatCompletionToolParam]response_format:Optional[Type[BaseModel]]create_args:Dict[str,Any]
[docs]classBaseOpenAIChatCompletionClient(ChatCompletionClient):def__init__(self,client:Union[AsyncOpenAI,AsyncAzureOpenAI],*,create_args:Dict[str,Any],model_capabilities:Optional[ModelCapabilities]=None,# type: ignoremodel_info:Optional[ModelInfo]=None,add_name_prefixes:bool=False,):self._client=clientself._add_name_prefixes=add_name_prefixesifmodel_capabilitiesisNoneandmodel_infoisNone:try:self._model_info=_model_info.get_info(create_args["model"])exceptKeyErroraserr:raiseValueError("model_info is required when model name is not a valid OpenAI model")fromerrelifmodel_capabilitiesisnotNoneandmodel_infoisnotNone:raiseValueError("model_capabilities and model_info are mutually exclusive")elifmodel_capabilitiesisnotNoneandmodel_infoisNone:warnings.warn("model_capabilities is deprecated, use model_info instead",DeprecationWarning,stacklevel=2,)info=cast(ModelInfo,model_capabilities)info["family"]=ModelFamily.UNKNOWNself._model_info=infoelifmodel_capabilitiesisNoneandmodel_infoisnotNone:self._model_info=model_info# Validate model_info, check if all required fields are presentvalidate_model_info(self._model_info)self._resolved_model:Optional[str]=Noneif"model"increate_args:self._resolved_model=_model_info.resolve_model(create_args["model"])if(notself._model_info["json_output"]and"response_format"increate_argsand(isinstance(create_args["response_format"],dict)andcreate_args["response_format"]["type"]=="json_object")):raiseValueError("Model does not support JSON output.")self._create_args=create_argsself._total_usage=RequestUsage(prompt_tokens=0,completion_tokens=0)self._actual_usage=RequestUsage(prompt_tokens=0,completion_tokens=0)
def_rstrip_last_assistant_message(self,messages:Sequence[LLMMessage])->Sequence[LLMMessage]:""" Remove the last assistant message if it is empty. """# When Claude models last message is AssistantMessage, It could not end with whitespaceifisinstance(messages[-1],AssistantMessage):ifisinstance(messages[-1].content,str):messages[-1].content=messages[-1].content.rstrip()returnmessagesdef_process_create_args(self,messages:Sequence[LLMMessage],tools:Sequence[Tool|ToolSchema],json_output:Optional[bool|type[BaseModel]],extra_create_args:Mapping[str,Any],)->CreateParams:# Make sure all extra_create_args are validextra_create_args_keys=set(extra_create_args.keys())ifnotcreate_kwargs.issuperset(extra_create_args_keys):raiseValueError(f"Extra create args are invalid: {extra_create_args_keys-create_kwargs}")# Copy the create args and overwrite anything in extra_create_argscreate_args=self._create_args.copy()create_args.update(extra_create_args)# The response format value to use for the beta client.response_format_value:Optional[Type[BaseModel]]=Noneif"response_format"increate_args:# Legacy support for getting beta client mode from response_format.value=create_args["response_format"]ifisinstance(value,type)andissubclass(value,BaseModel):ifself.model_info["structured_output"]isFalse:raiseValueError("Model does not support structured output.")warnings.warn("Using response_format to specify the BaseModel for structured output type will be deprecated. ""Use json_output in create and create_stream instead.",DeprecationWarning,stacklevel=2,)response_format_value=value# Remove response_format from create_args to prevent passing it twice.delcreate_args["response_format"]# In all other cases when response_format is set to something else, we will# use the regular client.ifjson_outputisnotNone:ifself.model_info["json_output"]isFalseandjson_outputisTrue:raiseValueError("Model does not support JSON output.")ifjson_outputisTrue:# JSON mode.create_args["response_format"]=ResponseFormatJSONObject(type="json_object")elifjson_outputisFalse:# Text mode.create_args["response_format"]=ResponseFormatText(type="text")elifisinstance(json_output,type)andissubclass(json_output,BaseModel):ifself.model_info["structured_output"]isFalse:raiseValueError("Model does not support structured output.")ifresponse_format_valueisnotNone:raiseValueError("response_format and json_output cannot be set to a Pydantic model class at the same time.")# Beta client mode with Pydantic model class.response_format_value=json_outputelse:raiseValueError(f"json_output must be a boolean or a Pydantic model class, got {type(json_output)}")ifresponse_format_valueisnotNoneand"response_format"increate_args:warnings.warn("response_format is found in extra_create_args while json_output is set to a Pydantic model class. ""Skipping the response_format in extra_create_args in favor of the json_output. ""Structured output will be used.",UserWarning,stacklevel=2,)# If using beta client, remove response_format from create_args to prevent passing it twicedelcreate_args["response_format"]# TODO: allow custom handling.# For now we raise an error if images are present and vision is not supportedifself.model_info["vision"]isFalse:formessageinmessages:ifisinstance(message,UserMessage):ifisinstance(message.content,list)andany(isinstance(x,Image)forxinmessage.content):raiseValueError("Model does not support vision and image was provided")ifself.model_info["json_output"]isFalseandjson_outputisTrue:raiseValueError("Model does not support JSON output.")ifnotself.model_info.get("multiple_system_messages",False):# Some models accept only one system message(or, it will read only the last one)# So, merge system messages into one (if multiple and continuous)system_message_content=""_messages:List[LLMMessage]=[]_first_system_message_idx=-1_last_system_message_idx=-1# Index of the first system message for adding the merged system message at the correct positionforidx,messageinenumerate(messages):ifisinstance(message,SystemMessage):if_first_system_message_idx==-1:_first_system_message_idx=idxelif_last_system_message_idx+1!=idx:# That case, system message is not continuous# Merge system messages only contiues system messagesraiseValueError("Multiple and Not continuous system messages are not supported if model_info['multiple_system_messages'] is False")system_message_content+=message.content+"\n"_last_system_message_idx=idxelse:_messages.append(message)system_message_content=system_message_content.rstrip()ifsystem_message_content!="":system_message=SystemMessage(content=system_message_content)_messages.insert(_first_system_message_idx,system_message)messages=_messages# in that case, for ad-hoc, we using startswith instead of model_family for code consistencyifcreate_args.get("model","unknown").startswith("claude-"):# When Claude models last message is AssistantMessage, It could not end with whitespacemessages=self._rstrip_last_assistant_message(messages)oai_messages_nested=[to_oai_type(m,prepend_name=self._add_name_prefixes,model=create_args.get("model","unknown"),model_family=self._model_info["family"],)forminmessages]oai_messages=[itemforsublistinoai_messages_nestedforiteminsublist]ifself.model_info["function_calling"]isFalseandlen(tools)>0:raiseValueError("Model does not support function calling")converted_tools=convert_tools(tools)returnCreateParams(messages=oai_messages,tools=converted_tools,response_format=response_format_value,create_args=create_args,)
[docs]asyncdefcreate(self,messages:Sequence[LLMMessage],*,tools:Sequence[Tool|ToolSchema]=[],json_output:Optional[bool|type[BaseModel]]=None,extra_create_args:Mapping[str,Any]={},cancellation_token:Optional[CancellationToken]=None,)->CreateResult:create_params=self._process_create_args(messages,tools,json_output,extra_create_args,)future:Union[Task[ParsedChatCompletion[BaseModel]],Task[ChatCompletion]]ifcreate_params.response_formatisnotNone:# Use beta client if response_format is not Nonefuture=asyncio.ensure_future(self._client.beta.chat.completions.parse(messages=create_params.messages,tools=(create_params.toolsiflen(create_params.tools)>0elseNOT_GIVEN),response_format=create_params.response_format,**create_params.create_args,))else:# Use the regular clientfuture=asyncio.ensure_future(self._client.chat.completions.create(messages=create_params.messages,stream=False,tools=(create_params.toolsiflen(create_params.tools)>0elseNOT_GIVEN),**create_params.create_args,))ifcancellation_tokenisnotNone:cancellation_token.link_future(future)result:Union[ParsedChatCompletion[BaseModel],ChatCompletion]=awaitfutureifcreate_params.response_formatisnotNone:result=cast(ParsedChatCompletion[Any],result)usage=RequestUsage(# TODO backup token countingprompt_tokens=result.usage.prompt_tokensifresult.usageisnotNoneelse0,completion_tokens=(result.usage.completion_tokensifresult.usageisnotNoneelse0),)logger.info(LLMCallEvent(messages=cast(List[Dict[str,Any]],create_params.messages),response=result.model_dump(),prompt_tokens=usage.prompt_tokens,completion_tokens=usage.completion_tokens,))ifself._resolved_modelisnotNone:ifself._resolved_model!=result.model:warnings.warn(f"Resolved model mismatch: {self._resolved_model} != {result.model}. ""Model mapping in autogen_ext.models.openai may be incorrect. "f"Set the model to {result.model} to enhance token/cost estimation and suppress this warning.",stacklevel=2,)# Limited to a single choice currently.choice:Union[ParsedChoice[Any],ParsedChoice[BaseModel],Choice]=result.choices[0]# Detect whether it is a function call or not.# We don't rely on choice.finish_reason as it is not always accurate, depending on the API used.content:Union[str,List[FunctionCall]]thought:str|None=Noneifchoice.message.function_callisnotNone:raiseValueError("function_call is deprecated and is not supported by this model client.")elifchoice.message.tool_callsisnotNoneandlen(choice.message.tool_calls)>0:ifchoice.finish_reason!="tool_calls":warnings.warn(f"Finish reason mismatch: {choice.finish_reason} != tool_calls ""when tool_calls are present. Finish reason may not be accurate. ""This may be due to the API used that is not returning the correct finish reason.",stacklevel=2,)ifchoice.message.contentisnotNoneandchoice.message.content!="":# Put the content in the thought field.thought=choice.message.content# NOTE: If OAI response type changes, this will need to be updatedcontent=[]fortool_callinchoice.message.tool_calls:ifnotisinstance(tool_call.function.arguments,str):warnings.warn(f"Tool call function arguments field is not a string: {tool_call.function.arguments}.""This is unexpected and may due to the API used not returning the correct type. ""Attempting to convert it to string.",stacklevel=2,)ifisinstance(tool_call.function.arguments,dict):tool_call.function.arguments=json.dumps(tool_call.function.arguments)content.append(FunctionCall(id=tool_call.id,arguments=tool_call.function.arguments,name=normalize_name(tool_call.function.name),))finish_reason="tool_calls"else:# if not tool_calls, then it is a text response and we populate the content and thought fields.finish_reason=choice.finish_reasoncontent=choice.message.contentor""# if there is a reasoning_content field, then we populate the thought field. This is for models such as R1 - direct from deepseek api.ifchoice.message.model_extraisnotNone:reasoning_content=choice.message.model_extra.get("reasoning_content")ifreasoning_contentisnotNone:thought=reasoning_contentlogprobs:Optional[List[ChatCompletionTokenLogprob]]=Noneifchoice.logprobsandchoice.logprobs.content:logprobs=[ChatCompletionTokenLogprob(token=x.token,logprob=x.logprob,top_logprobs=[TopLogprob(logprob=y.logprob,bytes=y.bytes)foryinx.top_logprobs],bytes=x.bytes,)forxinchoice.logprobs.content]# This is for local R1 models.ifisinstance(content,str)andself._model_info["family"]==ModelFamily.R1andthoughtisNone:thought,content=parse_r1_content(content)response=CreateResult(finish_reason=normalize_stop_reason(finish_reason),content=content,usage=usage,cached=False,logprobs=logprobs,thought=thought,)self._total_usage=_add_usage(self._total_usage,usage)self._actual_usage=_add_usage(self._actual_usage,usage)# TODO - why is this cast needed?returnresponse
[docs]asyncdefcreate_stream(self,messages:Sequence[LLMMessage],*,tools:Sequence[Tool|ToolSchema]=[],json_output:Optional[bool|type[BaseModel]]=None,extra_create_args:Mapping[str,Any]={},cancellation_token:Optional[CancellationToken]=None,max_consecutive_empty_chunk_tolerance:int=0,)->AsyncGenerator[Union[str,CreateResult],None]:"""Create a stream of string chunks from the model ending with a :class:`~autogen_core.models.CreateResult`. Extends :meth:`autogen_core.models.ChatCompletionClient.create_stream` to support OpenAI API. In streaming, the default behaviour is not return token usage counts. See: `OpenAI API reference for possible args <https://platform.openai.com/docs/api-reference/chat/create>`_. You can set `extra_create_args={"stream_options": {"include_usage": True}}` (if supported by the accessed API) to return a final chunk with usage set to a :class:`~autogen_core.models.RequestUsage` object with prompt and completion token counts, all preceding chunks will have usage as `None`. See: `OpenAI API reference for stream options <https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options>`_. Other examples of supported arguments that can be included in `extra_create_args`: - `temperature` (float): Controls the randomness of the output. Higher values (e.g., 0.8) make the output more random, while lower values (e.g., 0.2) make it more focused and deterministic. - `max_tokens` (int): The maximum number of tokens to generate in the completion. - `top_p` (float): An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. - `frequency_penalty` (float): A value between -2.0 and 2.0 that penalizes new tokens based on their existing frequency in the text so far, decreasing the likelihood of repeated phrases. - `presence_penalty` (float): A value between -2.0 and 2.0 that penalizes new tokens based on whether they appear in the text so far, encouraging the model to talk about new topics. """create_params=self._process_create_args(messages,tools,json_output,extra_create_args,)ifmax_consecutive_empty_chunk_tolerance!=0:warnings.warn("The 'max_consecutive_empty_chunk_tolerance' parameter is deprecated and will be removed in the future releases. All of empty chunks will be skipped with a warning.",DeprecationWarning,stacklevel=2,)ifcreate_params.response_formatisnotNone:chunks=self._create_stream_chunks_beta_client(tool_params=create_params.tools,oai_messages=create_params.messages,response_format=create_params.response_format,create_args_no_response_format=create_params.create_args,cancellation_token=cancellation_token,)else:chunks=self._create_stream_chunks(tool_params=create_params.tools,oai_messages=create_params.messages,create_args=create_params.create_args,cancellation_token=cancellation_token,)# Prepare data to process streaming chunks.chunk:ChatCompletionChunk|None=Nonestop_reason=Nonemaybe_model=Nonecontent_deltas:List[str]=[]thought_deltas:List[str]=[]full_tool_calls:Dict[int,FunctionCall]={}logprobs:Optional[List[ChatCompletionTokenLogprob]]=Noneempty_chunk_warning_has_been_issued:bool=Falseempty_chunk_warning_threshold:int=10empty_chunk_count=0first_chunk=Trueis_reasoning=False# Process the stream of chunks.asyncforchunkinchunks:iffirst_chunk:first_chunk=False# Emit the start event.logger.info(LLMStreamStartEvent(messages=cast(List[Dict[str,Any]],create_params.messages),))# Set the model from the lastest chunk.maybe_model=chunk.model# Empty chunks has been observed when the endpoint is under heavy load.# https://github.com/microsoft/autogen/issues/4213iflen(chunk.choices)==0:empty_chunk_count+=1ifnotempty_chunk_warning_has_been_issuedandempty_chunk_count>=empty_chunk_warning_threshold:empty_chunk_warning_has_been_issued=Truewarnings.warn(f"Received more than {empty_chunk_warning_threshold} consecutive empty chunks. Empty chunks are being ignored.",stacklevel=2,)continueelse:empty_chunk_count=0iflen(chunk.choices)>1:# This is a multi-choice chunk, we need to warn the user.warnings.warn(f"Received a chunk with {len(chunk.choices)} choices. Only the first choice will be used.",UserWarning,stacklevel=2,)# Set the choice to the first choice in the chunk.choice=chunk.choices[0]# for liteLLM chunk usage, do the following hack keeping the pervious chunk.stop_reason (if set).# set the stop_reason for the usage chunk to the prior stop_reasonstop_reason=choice.finish_reasonifchunk.usageisNoneandstop_reasonisNoneelsestop_reasonmaybe_model=chunk.modelreasoning_content:str|None=Noneifchoice.delta.model_extraisnotNoneand"reasoning_content"inchoice.delta.model_extra:# If there is a reasoning_content field, then we populate the thought field. This is for models such as R1.reasoning_content=choice.delta.model_extra.get("reasoning_content")ifisinstance(reasoning_content,str)andlen(reasoning_content)>0:ifnotis_reasoning:# Enter reasoning mode.reasoning_content="<think>"+reasoning_contentis_reasoning=Truethought_deltas.append(reasoning_content)yieldreasoning_contentelifis_reasoning:# Exit reasoning mode.reasoning_content="</think>"thought_deltas.append(reasoning_content)is_reasoning=Falseyieldreasoning_content# First try get contentifchoice.delta.content:content_deltas.append(choice.delta.content)iflen(choice.delta.content)>0:yieldchoice.delta.content# NOTE: for OpenAI, tool_calls and content are mutually exclusive it seems, so we can skip the rest of the loop.# However, this may not be the case for other APIs -- we should expect this may need to be updated.continue# Otherwise, get tool callsifchoice.delta.tool_callsisnotNone:fortool_call_chunkinchoice.delta.tool_calls:idx=tool_call_chunk.indexifidxnotinfull_tool_calls:# We ignore the type hint here because we want to fill in type when the delta provides itfull_tool_calls[idx]=FunctionCall(id="",arguments="",name="")iftool_call_chunk.idisnotNone:full_tool_calls[idx].id+=tool_call_chunk.idiftool_call_chunk.functionisnotNone:iftool_call_chunk.function.nameisnotNone:full_tool_calls[idx].name+=tool_call_chunk.function.nameiftool_call_chunk.function.argumentsisnotNone:full_tool_calls[idx].arguments+=tool_call_chunk.function.argumentsifchoice.logprobsandchoice.logprobs.content:logprobs=[ChatCompletionTokenLogprob(token=x.token,logprob=x.logprob,top_logprobs=[TopLogprob(logprob=y.logprob,bytes=y.bytes)foryinx.top_logprobs],bytes=x.bytes,)forxinchoice.logprobs.content]# Finalize the CreateResult.# TODO: can we remove this?ifstop_reason=="function_call":raiseValueError("Function calls are not supported in this context")# We need to get the model from the last chunk, if available.model=maybe_modelorcreate_params.create_args["model"]model=model.replace("gpt-35","gpt-3.5")# hack for Azure API# Because the usage chunk is not guaranteed to be the last chunk, we need to check if it is available.ifchunkandchunk.usage:prompt_tokens=chunk.usage.prompt_tokenscompletion_tokens=chunk.usage.completion_tokenselse:prompt_tokens=0completion_tokens=0usage=RequestUsage(prompt_tokens=prompt_tokens,completion_tokens=completion_tokens,)# Detect whether it is a function call or just text.content:Union[str,List[FunctionCall]]thought:str|None=None# Determine the content and thought based on what was collectediffull_tool_calls:# This is a tool call responsecontent=list(full_tool_calls.values())ifcontent_deltas:# Store any text alongside tool calls as thoughtsthought="".join(content_deltas)else:# This is a text response (possibly with thoughts)ifcontent_deltas:content="".join(content_deltas)else:warnings.warn("No text content or tool calls are available. Model returned empty result.",stacklevel=2,)content=""# Set thoughts if we have any reasoning content.ifthought_deltas:thought="".join(thought_deltas).lstrip("<think>").rstrip("</think>")# This is for local R1 models whose reasoning content is within the content string.ifisinstance(content,str)andself._model_info["family"]==ModelFamily.R1andthoughtisNone:thought,content=parse_r1_content(content)# Create the result.result=CreateResult(finish_reason=normalize_stop_reason(stop_reason),content=content,usage=usage,cached=False,logprobs=logprobs,thought=thought,)# Log the end of the stream.logger.info(LLMStreamEndEvent(response=result.model_dump(),prompt_tokens=usage.prompt_tokens,completion_tokens=usage.completion_tokens,))# Update the total usage.self._total_usage=_add_usage(self._total_usage,usage)self._actual_usage=_add_usage(self._actual_usage,usage)# Yield the CreateResult.yieldresult
asyncdef_create_stream_chunks(self,tool_params:List[ChatCompletionToolParam],oai_messages:List[ChatCompletionMessageParam],create_args:Dict[str,Any],cancellation_token:Optional[CancellationToken],)->AsyncGenerator[ChatCompletionChunk,None]:stream_future=asyncio.ensure_future(self._client.chat.completions.create(messages=oai_messages,stream=True,tools=tool_paramsiflen(tool_params)>0elseNOT_GIVEN,**create_args,))ifcancellation_tokenisnotNone:cancellation_token.link_future(stream_future)stream=awaitstream_futurewhileTrue:try:chunk_future=asyncio.ensure_future(anext(stream))ifcancellation_tokenisnotNone:cancellation_token.link_future(chunk_future)chunk=awaitchunk_futureyieldchunkexceptStopAsyncIteration:breakasyncdef_create_stream_chunks_beta_client(self,tool_params:List[ChatCompletionToolParam],oai_messages:List[ChatCompletionMessageParam],create_args_no_response_format:Dict[str,Any],response_format:Optional[Type[BaseModel]],cancellation_token:Optional[CancellationToken],)->AsyncGenerator[ChatCompletionChunk,None]:asyncwithself._client.beta.chat.completions.stream(messages=oai_messages,tools=tool_paramsiflen(tool_params)>0elseNOT_GIVEN,response_format=(response_formatifresponse_formatisnotNoneelseNOT_GIVEN),**create_args_no_response_format,)asstream:whileTrue:try:event_future=asyncio.ensure_future(anext(stream))ifcancellation_tokenisnotNone:cancellation_token.link_future(event_future)event=awaitevent_futureifevent.type=="chunk":chunk=event.chunkyieldchunk# We don't handle other event types from the beta client stream.# As the other event types are auxiliary to the chunk event.# See: https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events.# Once the beta client is stable, we can move all the logic to the beta client.# Then we can consider handling other event types which may simplify the code overall.exceptStopAsyncIteration:break
@propertydefcapabilities(self)->ModelCapabilities:# type: ignorewarnings.warn("capabilities is deprecated, use model_info instead",DeprecationWarning,stacklevel=2,)returnself._model_info@propertydefmodel_info(self)->ModelInfo:returnself._model_info
[docs]classOpenAIChatCompletionClient(BaseOpenAIChatCompletionClient,Component[OpenAIClientConfigurationConfigModel]):"""Chat completion client for OpenAI hosted models. To use this client, you must install the `openai` extra: .. code-block:: bash pip install "autogen-ext[openai]" You can also use this client for OpenAI-compatible ChatCompletion endpoints. **Using this client for non-OpenAI models is not tested or guaranteed.** For non-OpenAI models, please first take a look at our `community extensions <https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html>`_ for additional model clients. Args: model (str): Which OpenAI model to use. api_key (optional, str): The API key to use. **Required if 'OPENAI_API_KEY' is not found in the environment variables.** organization (optional, str): The organization ID to use. base_url (optional, str): The base URL to use. **Required if the model is not hosted on OpenAI.** timeout: (optional, float): The timeout for the request in seconds. max_retries (optional, int): The maximum number of retries to attempt. model_info (optional, ModelInfo): The capabilities of the model. **Required if the model name is not a valid OpenAI model.** frequency_penalty (optional, float): logit_bias: (optional, dict[str, int]): max_tokens (optional, int): n (optional, int): presence_penalty (optional, float): response_format (optional, Dict[str, Any]): the format of the response. Possible options are: .. code-block:: text # Text response, this is the default. {"type": "text"} .. code-block:: text # JSON response, make sure to instruct the model to return JSON. {"type": "json_object"} .. code-block:: text # Structured output response, with a pre-defined JSON schema. { "type": "json_schema", "json_schema": { "name": "name of the schema, must be an identifier.", "description": "description for the model.", # You can convert a Pydantic (v2) model to JSON schema # using the `model_json_schema()` method. "schema": "<the JSON schema itself>", # Whether to enable strict schema adherence when # generating the output. If set to true, the model will # always follow the exact schema defined in the # `schema` field. Only a subset of JSON Schema is # supported when `strict` is `true`. # To learn more, read # https://platform.openai.com/docs/guides/structured-outputs. "strict": False, # or True }, } It is recommended to use the `json_output` parameter in :meth:`~autogen_ext.models.openai.BaseOpenAIChatCompletionClient.create` or :meth:`~autogen_ext.models.openai.BaseOpenAIChatCompletionClient.create_stream` methods instead of `response_format` for structured output. The `json_output` parameter is more flexible and allows you to specify a Pydantic model class directly. seed (optional, int): stop (optional, str | List[str]): temperature (optional, float): top_p (optional, float): user (optional, str): default_headers (optional, dict[str, str]): Custom headers; useful for authentication or other custom requirements. add_name_prefixes (optional, bool): Whether to prepend the `source` value to each :class:`~autogen_core.models.UserMessage` content. E.g., "this is content" becomes "Reviewer said: this is content." This can be useful for models that do not support the `name` field in message. Defaults to False. stream_options (optional, dict): Additional options for streaming. Currently only `include_usage` is supported. Examples: The following code snippet shows how to use the client with an OpenAI model: .. code-block:: python from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_core.models import UserMessage openai_client = OpenAIChatCompletionClient( model="gpt-4o-2024-08-06", # api_key="sk-...", # Optional if you have an OPENAI_API_KEY environment variable set. ) result = await openai_client.create([UserMessage(content="What is the capital of France?", source="user")]) # type: ignore print(result) # Close the client when done. # await openai_client.close() To use the client with a non-OpenAI model, you need to provide the base URL of the model and the model info. For example, to use Ollama, you can use the following code snippet: .. code-block:: python from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_core.models import ModelFamily custom_model_client = OpenAIChatCompletionClient( model="deepseek-r1:1.5b", base_url="http://localhost:11434/v1", api_key="placeholder", model_info={ "vision": False, "function_calling": False, "json_output": False, "family": ModelFamily.R1, "structured_output": True, }, ) # Close the client when done. # await custom_model_client.close() To use streaming mode, you can use the following code snippet: .. code-block:: python import asyncio from autogen_core.models import UserMessage from autogen_ext.models.openai import OpenAIChatCompletionClient async def main() -> None: # Similar for AzureOpenAIChatCompletionClient. model_client = OpenAIChatCompletionClient(model="gpt-4o") # assuming OPENAI_API_KEY is set in the environment. messages = [UserMessage(content="Write a very short story about a dragon.", source="user")] # Create a stream. stream = model_client.create_stream(messages=messages) # Iterate over the stream and print the responses. print("Streamed responses:") async for response in stream: if isinstance(response, str): # A partial response is a string. print(response, flush=True, end="") else: # The last response is a CreateResult object with the complete message. print("\\n\\n------------\\n") print("The complete response:", flush=True) print(response.content, flush=True) # Close the client when done. await model_client.close() asyncio.run(main()) To use structured output as well as function calling, you can use the following code snippet: .. code-block:: python import asyncio from typing import Literal from autogen_core.models import ( AssistantMessage, FunctionExecutionResult, FunctionExecutionResultMessage, SystemMessage, UserMessage, ) from autogen_core.tools import FunctionTool from autogen_ext.models.openai import OpenAIChatCompletionClient from pydantic import BaseModel # Define the structured output format. class AgentResponse(BaseModel): thoughts: str response: Literal["happy", "sad", "neutral"] # Define the function to be called as a tool. def sentiment_analysis(text: str) -> str: \"\"\"Given a text, return the sentiment.\"\"\" return "happy" if "happy" in text else "sad" if "sad" in text else "neutral" # Create a FunctionTool instance with `strict=True`, # which is required for structured output mode. tool = FunctionTool(sentiment_analysis, description="Sentiment Analysis", strict=True) async def main() -> None: # Create an OpenAIChatCompletionClient instance. model_client = OpenAIChatCompletionClient(model="gpt-4o-mini") # Generate a response using the tool. response1 = await model_client.create( messages=[ SystemMessage(content="Analyze input text sentiment using the tool provided."), UserMessage(content="I am happy.", source="user"), ], tools=[tool], ) print(response1.content) # Should be a list of tool calls. # [FunctionCall(name="sentiment_analysis", arguments={"text": "I am happy."}, ...)] assert isinstance(response1.content, list) response2 = await model_client.create( messages=[ SystemMessage(content="Analyze input text sentiment using the tool provided."), UserMessage(content="I am happy.", source="user"), AssistantMessage(content=response1.content, source="assistant"), FunctionExecutionResultMessage( content=[FunctionExecutionResult(content="happy", call_id=response1.content[0].id, is_error=False, name="sentiment_analysis")] ), ], # Use the structured output format. json_output=AgentResponse, ) print(response2.content) # Should be a structured output. # {"thoughts": "The user is happy.", "response": "happy"} # Close the client when done. await model_client.close() asyncio.run(main()) To load the client from a configuration, you can use the `load_component` method: .. code-block:: python from autogen_core.models import ChatCompletionClient config = { "provider": "OpenAIChatCompletionClient", "config": {"model": "gpt-4o", "api_key": "REPLACE_WITH_YOUR_API_KEY"}, } client = ChatCompletionClient.load_component(config) To view the full list of available configuration options, see the :py:class:`OpenAIClientConfigurationConfigModel` class. """component_type="model"component_config_schema=OpenAIClientConfigurationConfigModelcomponent_provider_override="autogen_ext.models.openai.OpenAIChatCompletionClient"def__init__(self,**kwargs:Unpack[OpenAIClientConfiguration]):if"model"notinkwargs:raiseValueError("model is required for OpenAIChatCompletionClient")model_capabilities:Optional[ModelCapabilities]=None# type: ignoreself._raw_config:Dict[str,Any]=dict(kwargs).copy()copied_args=dict(kwargs).copy()if"model_capabilities"inkwargs:model_capabilities=kwargs["model_capabilities"]delcopied_args["model_capabilities"]model_info:Optional[ModelInfo]=Noneif"model_info"inkwargs:model_info=kwargs["model_info"]delcopied_args["model_info"]add_name_prefixes:bool=Falseif"add_name_prefixes"inkwargs:add_name_prefixes=kwargs["add_name_prefixes"]# Special handling for Gemini model.assert"model"incopied_argsandisinstance(copied_args["model"],str)ifcopied_args["model"].startswith("gemini-"):if"base_url"notincopied_args:copied_args["base_url"]=_model_info.GEMINI_OPENAI_BASE_URLif"api_key"notincopied_argsand"GEMINI_API_KEY"inos.environ:copied_args["api_key"]=os.environ["GEMINI_API_KEY"]ifcopied_args["model"].startswith("claude-"):if"base_url"notincopied_args:copied_args["base_url"]=_model_info.ANTHROPIC_OPENAI_BASE_URLif"api_key"notincopied_argsand"ANTHROPIC_API_KEY"inos.environ:copied_args["api_key"]=os.environ["ANTHROPIC_API_KEY"]client=_openai_client_from_config(copied_args)create_args=_create_args_from_config(copied_args)super().__init__(client=client,create_args=create_args,model_capabilities=model_capabilities,model_info=model_info,add_name_prefixes=add_name_prefixes,)def__getstate__(self)->Dict[str,Any]:state=self.__dict__.copy()state["_client"]=Nonereturnstatedef__setstate__(self,state:Dict[str,Any])->None:self.__dict__.update(state)self._client=_openai_client_from_config(state["_raw_config"])
[docs]@classmethoddef_from_config(cls,config:OpenAIClientConfigurationConfigModel)->Self:copied_config=config.model_copy().model_dump(exclude_none=True)# Handle api_key as SecretStrif"api_key"incopied_configandisinstance(config.api_key,SecretStr):copied_config["api_key"]=config.api_key.get_secret_value()returncls(**copied_config)
[docs]classAzureOpenAIChatCompletionClient(BaseOpenAIChatCompletionClient,Component[AzureOpenAIClientConfigurationConfigModel]):"""Chat completion client for Azure OpenAI hosted models. To use this client, you must install the `azure` and `openai` extensions: .. code-block:: bash pip install "autogen-ext[openai,azure]" Args: model (str): Which OpenAI model to use. azure_endpoint (str): The endpoint for the Azure model. **Required for Azure models.** azure_deployment (str): Deployment name for the Azure model. **Required for Azure models.** api_version (str): The API version to use. **Required for Azure models.** azure_ad_token (str): The Azure AD token to use. Provide this or `azure_ad_token_provider` for token-based authentication. azure_ad_token_provider (optional, Callable[[], Awaitable[str]] | AzureTokenProvider): The Azure AD token provider to use. Provide this or `azure_ad_token` for token-based authentication. api_key (optional, str): The API key to use, use this if you are using key based authentication. It is optional if you are using Azure AD token based authentication or `AZURE_OPENAI_API_KEY` environment variable. timeout: (optional, float): The timeout for the request in seconds. max_retries (optional, int): The maximum number of retries to attempt. model_info (optional, ModelInfo): The capabilities of the model. **Required if the model name is not a valid OpenAI model.** frequency_penalty (optional, float): logit_bias: (optional, dict[str, int]): max_tokens (optional, int): n (optional, int): presence_penalty (optional, float): response_format (optional, Dict[str, Any]): the format of the response. Possible options are: .. code-block:: text # Text response, this is the default. {"type": "text"} .. code-block:: text # JSON response, make sure to instruct the model to return JSON. {"type": "json_object"} .. code-block:: text # Structured output response, with a pre-defined JSON schema. { "type": "json_schema", "json_schema": { "name": "name of the schema, must be an identifier.", "description": "description for the model.", # You can convert a Pydantic (v2) model to JSON schema # using the `model_json_schema()` method. "schema": "<the JSON schema itself>", # Whether to enable strict schema adherence when # generating the output. If set to true, the model will # always follow the exact schema defined in the # `schema` field. Only a subset of JSON Schema is # supported when `strict` is `true`. # To learn more, read # https://platform.openai.com/docs/guides/structured-outputs. "strict": False, # or True }, } It is recommended to use the `json_output` parameter in :meth:`~autogen_ext.models.openai.BaseOpenAIChatCompletionClient.create` or :meth:`~autogen_ext.models.openai.BaseOpenAIChatCompletionClient.create_stream` methods instead of `response_format` for structured output. The `json_output` parameter is more flexible and allows you to specify a Pydantic model class directly. seed (optional, int): stop (optional, str | List[str]): temperature (optional, float): top_p (optional, float): user (optional, str): default_headers (optional, dict[str, str]): Custom headers; useful for authentication or other custom requirements. To use the client, you need to provide your deployment name, Azure Cognitive Services endpoint, and api version. For authentication, you can either provide an API key or an Azure Active Directory (AAD) token credential. The following code snippet shows how to use AAD authentication. The identity used must be assigned the `Cognitive Services OpenAI User <https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/role-based-access-control#cognitive-services-openai-user>`_ role. .. code-block:: python from autogen_ext.auth.azure import AzureTokenProvider from autogen_ext.models.openai import AzureOpenAIChatCompletionClient from azure.identity import DefaultAzureCredential # Create the token provider token_provider = AzureTokenProvider( DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default", ) az_model_client = AzureOpenAIChatCompletionClient( azure_deployment="{your-azure-deployment}", model="{model-name, such as gpt-4o}", api_version="2024-06-01", azure_endpoint="https://{your-custom-endpoint}.openai.azure.com/", azure_ad_token_provider=token_provider, # Optional if you choose key-based authentication. # api_key="sk-...", # For key-based authentication. ) See other usage examples in the :class:`OpenAIChatCompletionClient` class. To load the client that uses identity based aith from a configuration, you can use the `load_component` method: .. code-block:: python from autogen_core.models import ChatCompletionClient config = { "provider": "AzureOpenAIChatCompletionClient", "config": { "model": "gpt-4o-2024-05-13", "azure_endpoint": "https://{your-custom-endpoint}.openai.azure.com/", "azure_deployment": "{your-azure-deployment}", "api_version": "2024-06-01", "azure_ad_token_provider": { "provider": "autogen_ext.auth.azure.AzureTokenProvider", "config": { "provider_kind": "DefaultAzureCredential", "scopes": ["https://cognitiveservices.azure.com/.default"], }, }, }, } client = ChatCompletionClient.load_component(config) To view the full list of available configuration options, see the :py:class:`AzureOpenAIClientConfigurationConfigModel` class. .. note:: Right now only `DefaultAzureCredential` is supported with no additional args passed to it. .. note:: The Azure OpenAI client by default sets the User-Agent header to `autogen-python/{version}`. To override this, you can set the variable `autogen_ext.models.openai.AZURE_OPENAI_USER_AGENT` environment variable to an empty string. See `here <https://learn.microsoft.com/en-us/azure/ai-services/openai/how-to/managed-identity#chat-completions>`_ for how to use the Azure client directly or for more info. """component_type="model"component_config_schema=AzureOpenAIClientConfigurationConfigModelcomponent_provider_override="autogen_ext.models.openai.AzureOpenAIChatCompletionClient"def__init__(self,**kwargs:Unpack[AzureOpenAIClientConfiguration]):model_capabilities:Optional[ModelCapabilities]=None# type: ignorecopied_args=dict(kwargs).copy()if"model_capabilities"inkwargs:model_capabilities=kwargs["model_capabilities"]delcopied_args["model_capabilities"]model_info:Optional[ModelInfo]=Noneif"model_info"inkwargs:model_info=kwargs["model_info"]delcopied_args["model_info"]add_name_prefixes:bool=Falseif"add_name_prefixes"inkwargs:add_name_prefixes=kwargs["add_name_prefixes"]client=_azure_openai_client_from_config(copied_args)create_args=_create_args_from_config(copied_args)self._raw_config:Dict[str,Any]=copied_argssuper().__init__(client=client,create_args=create_args,model_capabilities=model_capabilities,model_info=model_info,add_name_prefixes=add_name_prefixes,)def__getstate__(self)->Dict[str,Any]:state=self.__dict__.copy()state["_client"]=Nonereturnstatedef__setstate__(self,state:Dict[str,Any])->None:self.__dict__.update(state)self._client=_azure_openai_client_from_config(state["_raw_config"])
[docs]def_to_config(self)->AzureOpenAIClientConfigurationConfigModel:from...auth.azureimportAzureTokenProvidercopied_config=self._raw_config.copy()if"azure_ad_token_provider"incopied_config:ifnotisinstance(copied_config["azure_ad_token_provider"],AzureTokenProvider):raiseValueError("azure_ad_token_provider must be a AzureTokenProvider to be component serialized")copied_config["azure_ad_token_provider"]=(copied_config["azure_ad_token_provider"].dump_component().model_dump(exclude_none=True))returnAzureOpenAIClientConfigurationConfigModel(**copied_config)
[docs]@classmethoddef_from_config(cls,config:AzureOpenAIClientConfigurationConfigModel)->Self:from...auth.azureimportAzureTokenProvidercopied_config=config.model_copy().model_dump(exclude_none=True)# Handle api_key as SecretStrif"api_key"incopied_configandisinstance(config.api_key,SecretStr):copied_config["api_key"]=config.api_key.get_secret_value()if"azure_ad_token_provider"incopied_config:copied_config["azure_ad_token_provider"]=AzureTokenProvider.load_component(copied_config["azure_ad_token_provider"])returncls(**copied_config)