Source code for autogen_ext.models.ollama._ollama_client
importasyncioimportinspectimportjsonimportloggingimportmathimportreimportwarningsfromdataclassesimportdataclassfromtypingimport(Any,AsyncGenerator,Dict,List,Literal,Mapping,Optional,Sequence,Union,cast,)importtiktokenfromautogen_coreimport(EVENT_LOGGER_NAME,TRACE_LOGGER_NAME,CancellationToken,Component,FunctionCall,Image,)fromautogen_core.loggingimportLLMCallEvent,LLMStreamEndEvent,LLMStreamStartEventfromautogen_core.modelsimport(AssistantMessage,ChatCompletionClient,CreateResult,FinishReasons,FunctionExecutionResultMessage,LLMMessage,ModelCapabilities,# type: ignoreModelFamily,ModelInfo,RequestUsage,SystemMessage,UserMessage,)fromautogen_core.toolsimportTool,ToolSchemafromollamaimportAsyncClient,ChatResponse,MessagefromollamaimportImageasOllamaImagefromollamaimportToolasOllamaToolfromollama._typesimportChatRequestfrompydanticimportBaseModelfrompydantic.json_schemaimportJsonSchemaValuefromtyping_extensionsimportSelf,Unpackfrom.import_model_infofrom.configimportBaseOllamaClientConfiguration,BaseOllamaClientConfigurationConfigModellogger=logging.getLogger(EVENT_LOGGER_NAME)trace_logger=logging.getLogger(TRACE_LOGGER_NAME)# TODO: support more kwargs. Can't automate the list like we can with openai or azure because ollama uses an untyped kwargs blob for initialization.ollama_init_kwargs=set(["host"])# TODO: add kwarg checking logic later# create_kwargs = set(completion_create_params.CompletionCreateParamsBase.__annotations__.keys()) | set(# ("timeout", "stream")# )# # Only single choice allowed# disallowed_create_args = set(["stream", "messages", "function_call", "functions", "n"])# required_create_args: Set[str] = set(["model"])def_ollama_client_from_config(config:Mapping[str,Any])->AsyncClient:# Take a copycopied_config=dict(config).copy()# Shave down the config to just the AzureOpenAIChatCompletionClient kwargsollama_config={k:vfork,vincopied_config.items()ifkinollama_init_kwargs}returnAsyncClient(**ollama_config)ollama_chat_request_fields:dict[str,Any]=[mformininspect.getmembers(ChatRequest)ifm[0]=="model_fields"][0][1]OLLAMA_VALID_CREATE_KWARGS_KEYS=set(ollama_chat_request_fields.keys())|set(("model","messages","tools","stream","format","options","keep_alive","response_format"))# NOTE: "response_format" is a special case that we handle for backwards compatibility.# It is going to be deprecated in the future.def_create_args_from_config(config:Mapping[str,Any])->Dict[str,Any]:if"response_format"inconfig:warnings.warn("Using response_format will be deprecated. Use json_output instead.",DeprecationWarning,stacklevel=2,)create_args={k.lower():vfork,vinconfig.items()ifk.lower()inOLLAMA_VALID_CREATE_KWARGS_KEYS}dropped_keys=[kforkinconfig.keys()ifk.lower()notinOLLAMA_VALID_CREATE_KWARGS_KEYS]trace_logger.info(f"Dropped the following unrecognized keys from create_args: {dropped_keys}")returncreate_args# create_args = {k: v for k, v in config.items() if k in create_kwargs}# create_args_keys = set(create_args.keys())# if not required_create_args.issubset(create_args_keys):# raise ValueError(f"Required create args are missing: {required_create_args - create_args_keys}")# if disallowed_create_args.intersection(create_args_keys):# raise ValueError(f"Disallowed create args are present: {disallowed_create_args.intersection(create_args_keys)}")# return create_args# TODO check types# oai_system_message_schema = type2schema(ChatCompletionSystemMessageParam)# oai_user_message_schema = type2schema(ChatCompletionUserMessageParam)# oai_assistant_message_schema = type2schema(ChatCompletionAssistantMessageParam)# oai_tool_message_schema = type2schema(ChatCompletionToolMessageParam)deftype_to_role(message:LLMMessage)->str:# return type: Message.roleifisinstance(message,SystemMessage):return"system"elifisinstance(message,UserMessage):return"user"elifisinstance(message,AssistantMessage):return"assistant"else:return"tool"defuser_message_to_ollama(message:UserMessage)->Sequence[Message]:assert_valid_name(message.source)ifisinstance(message.content,str):return[Message(content=message.content,role="user",# name=message.source, # TODO: No name parameter in Ollama)]else:ollama_messages:List[Message]=[]forpartinmessage.content:ifisinstance(part,str):ollama_messages.append(Message(content=part,role="user"))elifisinstance(part,Image):# TODO: should images go into their own message? Should each image get its own message?ifnotollama_messages:ollama_messages.append(Message(role="user",images=[OllamaImage(value=part.to_base64())]))else:ifollama_messages[-1].imagesisNone:ollama_messages[-1].images=[OllamaImage(value=part.to_base64())]else:ollama_messages[-1].images.append(OllamaImage(value=part.to_base64()))# type: ignoreelse:raiseValueError(f"Unknown content type: {part}")returnollama_messagesdefsystem_message_to_ollama(message:SystemMessage)->Message:returnMessage(content=message.content,role="system",)def_func_args_to_ollama_args(args:str)->Dict[str,Any]:returnjson.loads(args)# type: ignoredeffunc_call_to_ollama(message:FunctionCall)->Message.ToolCall:returnMessage.ToolCall(function=Message.ToolCall.Function(name=message.name,arguments=_func_args_to_ollama_args(message.arguments),))deftool_message_to_ollama(message:FunctionExecutionResultMessage,)->Sequence[Message]:return[Message(content=x.content,role="tool")forxinmessage.content]defassistant_message_to_ollama(message:AssistantMessage,)->Message:assert_valid_name(message.source)ifisinstance(message.content,list):returnMessage(tool_calls=[func_call_to_ollama(x)forxinmessage.content],role="assistant",# name=message.source,)else:returnMessage(content=message.content,role="assistant",)defto_ollama_type(message:LLMMessage)->Sequence[Message]:ifisinstance(message,SystemMessage):return[system_message_to_ollama(message)]elifisinstance(message,UserMessage):returnuser_message_to_ollama(message)elifisinstance(message,AssistantMessage):return[assistant_message_to_ollama(message)]else:returntool_message_to_ollama(message)# TODO: Is this correct? Do we need this?defcalculate_vision_tokens(image:Image,detail:str="auto")->int:MAX_LONG_EDGE=2048BASE_TOKEN_COUNT=85TOKENS_PER_TILE=170MAX_SHORT_EDGE=768TILE_SIZE=512ifdetail=="low":returnBASE_TOKEN_COUNTwidth,height=image.image.size# Scale down to fit within a MAX_LONG_EDGE x MAX_LONG_EDGE square if necessaryifwidth>MAX_LONG_EDGEorheight>MAX_LONG_EDGE:aspect_ratio=width/heightifaspect_ratio>1:# Width is greater than heightwidth=MAX_LONG_EDGEheight=int(MAX_LONG_EDGE/aspect_ratio)else:# Height is greater than or equal to widthheight=MAX_LONG_EDGEwidth=int(MAX_LONG_EDGE*aspect_ratio)# Resize such that the shortest side is MAX_SHORT_EDGE if both dimensions exceed MAX_SHORT_EDGEaspect_ratio=width/heightifwidth>MAX_SHORT_EDGEandheight>MAX_SHORT_EDGE:ifaspect_ratio>1:# Width is greater than heightheight=MAX_SHORT_EDGEwidth=int(MAX_SHORT_EDGE*aspect_ratio)else:# Height is greater than or equal to widthwidth=MAX_SHORT_EDGEheight=int(MAX_SHORT_EDGE/aspect_ratio)# Calculate the number of tiles based on TILE_SIZEtiles_width=math.ceil(width/TILE_SIZE)tiles_height=math.ceil(height/TILE_SIZE)total_tiles=tiles_width*tiles_height# Calculate the total tokens based on the number of tiles and the base token counttotal_tokens=BASE_TOKEN_COUNT+TOKENS_PER_TILE*total_tilesreturntotal_tokensdef_add_usage(usage1:RequestUsage,usage2:RequestUsage)->RequestUsage:returnRequestUsage(prompt_tokens=usage1.prompt_tokens+usage2.prompt_tokens,completion_tokens=usage1.completion_tokens+usage2.completion_tokens,)# Ollama's tools follow a stricter protocol than OAI or us. While OAI accepts a map of [str, Any], Ollama requires a map of [str, Property] where Property is a typed object containing a type and description. Therefore, only the keys "type" and "description" will be converted from the properties blob in the tool schemadefconvert_tools(tools:Sequence[Tool|ToolSchema],)->List[OllamaTool]:result:List[OllamaTool]=[]fortoolintools:ifisinstance(tool,Tool):tool_schema=tool.schemaelse:assertisinstance(tool,dict)tool_schema=toolparameters=tool_schema["parameters"]if"parameters"intool_schemaelseNoneollama_properties:Mapping[str,OllamaTool.Function.Parameters.Property]|None=NoneifparametersisnotNone:ollama_properties={}forprop_name,prop_schemainparameters["properties"].items():ollama_properties[prop_name]=OllamaTool.Function.Parameters.Property(type=prop_schema["type"],description=prop_schema["description"]if"description"inprop_schemaelseNone,)result.append(OllamaTool(function=OllamaTool.Function(name=tool_schema["name"],description=tool_schema["description"]if"description"intool_schemaelse"",parameters=OllamaTool.Function.Parameters(required=parameters["required"]ifparametersisnotNoneand"required"inparameterselseNone,properties=ollama_properties,),),))# Check if all tools have valid names.fortool_paraminresult:assert_valid_name(tool_param["function"]["name"])returnresultdefnormalize_name(name:str)->str:""" LLMs sometimes ask functions while ignoring their own format requirements, this function should be used to replace invalid characters with "_". Prefer _assert_valid_name for validating user configuration or input """returnre.sub(r"[^a-zA-Z0-9_-]","_",name)[:64]defassert_valid_name(name:str)->str:""" Ensure that configured names are valid, raises ValueError if not. For munging LLM responses use _normalize_name to ensure LLM specified names don't break the API. """ifnotre.match(r"^[a-zA-Z0-9_-]+$",name):raiseValueError(f"Invalid name: {name}. Only letters, numbers, '_' and '-' are allowed.")iflen(name)>64:raiseValueError(f"Invalid name: {name}. Name must be less than 64 characters.")returnname# TODO: Does this need to change?defnormalize_stop_reason(stop_reason:str|None)->FinishReasons:ifstop_reasonisNone:return"unknown"# Convert to lower casestop_reason=stop_reason.lower()KNOWN_STOP_MAPPINGS:Dict[str,FinishReasons]={"stop":"stop","end_turn":"stop","tool_calls":"function_calls",}returnKNOWN_STOP_MAPPINGS.get(stop_reason,"unknown")@dataclassclassCreateParams:messages:Sequence[Message]tools:Sequence[OllamaTool]format:Optional[Union[Literal["","json"],JsonSchemaValue]]create_args:Dict[str,Any]classBaseOllamaChatCompletionClient(ChatCompletionClient):def__init__(self,client:AsyncClient,*,create_args:Dict[str,Any],model_capabilities:Optional[ModelCapabilities]=None,# type: ignoremodel_info:Optional[ModelInfo]=None,):self._client=clientself._model_name=create_args["model"]ifmodel_capabilitiesisNoneandmodel_infoisNone:try:self._model_info=_model_info.get_info(create_args["model"])exceptKeyErroraserr:raiseValueError("model_info is required when model name is not a valid OpenAI model")fromerrelifmodel_capabilitiesisnotNoneandmodel_infoisnotNone:raiseValueError("model_capabilities and model_info are mutually exclusive")elifmodel_capabilitiesisnotNoneandmodel_infoisNone:warnings.warn("model_capabilities is deprecated, use model_info instead",DeprecationWarning,stacklevel=2)info=cast(ModelInfo,model_capabilities)info["family"]=ModelFamily.UNKNOWNself._model_info=infoelifmodel_capabilitiesisNoneandmodel_infoisnotNone:self._model_info=model_infoself._resolved_model:Optional[str]=Noneself._model_class:Optional[str]=Noneif"model"increate_args:self._resolved_model=create_args["model"]self._model_class=_model_info.resolve_model_class(create_args["model"])if(notself._model_info["json_output"]and"response_format"increate_argsand(isinstance(create_args["response_format"],dict)andcreate_args["response_format"]["type"]=="json_object")):raiseValueError("Model does not support JSON output.")self._create_args=create_argsself._total_usage=RequestUsage(prompt_tokens=0,completion_tokens=0)self._actual_usage=RequestUsage(prompt_tokens=0,completion_tokens=0)# Ollama doesn't have IDs for tools, so we just increment a counterself._tool_id=0@classmethoddefcreate_from_config(cls,config:Dict[str,Any])->ChatCompletionClient:returnOllamaChatCompletionClient(**config)defget_create_args(self)->Mapping[str,Any]:returnself._create_argsdef_process_create_args(self,messages:Sequence[LLMMessage],tools:Sequence[Tool|ToolSchema],json_output:Optional[bool|type[BaseModel]],extra_create_args:Mapping[str,Any],)->CreateParams:# Copy the create args and overwrite anything in extra_create_argscreate_args=self._create_args.copy()create_args.update(extra_create_args)create_args=_create_args_from_config(create_args)response_format_value:JsonSchemaValue|Literal["json"]|None=Noneif"response_format"increate_args:warnings.warn("Using response_format will be deprecated. Use json_output instead.",DeprecationWarning,stacklevel=2,)value=create_args["response_format"]ifisinstance(value,type)andissubclass(value,BaseModel):response_format_value=value.model_json_schema()# Remove response_format from create_args to prevent passing it twice.delcreate_args["response_format"]else:raiseValueError(f"response_format must be a Pydantic model class, not {type(value)}")ifjson_outputisnotNone:ifself.model_info["json_output"]isFalseandjson_outputisTrue:raiseValueError("Model does not support JSON output.")ifjson_outputisTrue:# JSON mode.response_format_value="json"elifjson_outputisFalse:# Text mode.response_format_value=Noneelifisinstance(json_output,type)andissubclass(json_output,BaseModel):ifresponse_format_valueisnotNone:raiseValueError("response_format and json_output cannot be set to a Pydantic model class at the same time. ""Use json_output instead.")# Beta client mode with Pydantic model class.response_format_value=json_output.model_json_schema()else:raiseValueError(f"json_output must be a boolean or a Pydantic model class, got {type(json_output)}")if"format"increate_args:# Handle the case where format is set from create_args.ifjson_outputisnotNone:raiseValueError("json_output and format cannot be set at the same time. Use json_output instead.")assertresponse_format_valueisNoneresponse_format_value=create_args["format"]# Remove format from create_args to prevent passing it twice.delcreate_args["format"]# TODO: allow custom handling.# For now we raise an error if images are present and vision is not supportedifself.model_info["vision"]isFalse:formessageinmessages:ifisinstance(message,UserMessage):ifisinstance(message.content,list)andany(isinstance(x,Image)forxinmessage.content):raiseValueError("Model does not support vision and image was provided")ifself.model_info["json_output"]isFalseandjson_outputisTrue:raiseValueError("Model does not support JSON output.")ollama_messages_nested=[to_ollama_type(m)forminmessages]ollama_messages=[itemforsublistinollama_messages_nestedforiteminsublist]ifself.model_info["function_calling"]isFalseandlen(tools)>0:raiseValueError("Model does not support function calling and tools were provided")converted_tools=convert_tools(tools)returnCreateParams(messages=ollama_messages,tools=converted_tools,format=response_format_value,create_args=create_args,)asyncdefcreate(self,messages:Sequence[LLMMessage],*,tools:Sequence[Tool|ToolSchema]=[],json_output:Optional[bool|type[BaseModel]]=None,extra_create_args:Mapping[str,Any]={},cancellation_token:Optional[CancellationToken]=None,)->CreateResult:# Make sure all extra_create_args are valid# TODO: kwarg checking logic# extra_create_args_keys = set(extra_create_args.keys())# if not create_kwargs.issuperset(extra_create_args_keys):# raise ValueError(f"Extra create args are invalid: {extra_create_args_keys - create_kwargs}")create_params=self._process_create_args(messages,tools,json_output,extra_create_args,)future=asyncio.ensure_future(self._client.chat(# type: ignore# model=self._model_name,messages=create_params.messages,tools=create_params.toolsiflen(create_params.tools)>0elseNone,stream=False,format=create_params.format,**create_params.create_args,))ifcancellation_tokenisnotNone:cancellation_token.link_future(future)result:ChatResponse=awaitfutureusage=RequestUsage(# TODO backup token countingprompt_tokens=result.prompt_eval_countifresult.prompt_eval_countisnotNoneelse0,completion_tokens=(result.eval_countifresult.eval_countisnotNoneelse0),)logger.info(LLMCallEvent(messages=[m.model_dump()formincreate_params.messages],response=result.model_dump(),prompt_tokens=usage.prompt_tokens,completion_tokens=usage.completion_tokens,))ifself._resolved_modelisnotNone:ifself._resolved_model!=result.model:warnings.warn(f"Resolved model mismatch: {self._resolved_model} != {result.model}. ""Model mapping in autogen_ext.models.openai may be incorrect.",stacklevel=2,)# Detect whether it is a function call or not.# We don't rely on choice.finish_reason as it is not always accurate, depending on the API used.content:Union[str,List[FunctionCall]]ifresult.message.tool_callsisnotNone:# TODO: What are possible values for done_reason?ifresult.done_reason!="tool_calls":warnings.warn(f"Finish reason mismatch: {result.done_reason} != tool_calls ""when tool_calls are present. Finish reason may not be accurate. ""This may be due to the API used that is not returning the correct finish reason.",stacklevel=2,)# TODO: Is this still an error condition?ifresult.message.contentisnotNoneandresult.message.content!="":warnings.warn("Both tool_calls and content are present in the message. ""This is unexpected. content will be ignored, tool_calls will be used.",stacklevel=2,)# NOTE: If OAI response type changes, this will need to be updatedcontent=[FunctionCall(id=str(self._tool_id),arguments=json.dumps(x.function.arguments),name=normalize_name(x.function.name),)forxinresult.message.tool_calls]finish_reason="tool_calls"self._tool_id+=1else:finish_reason=result.done_reasonor""content=result.message.contentor""# Ollama currently doesn't provide these.# Currently open ticket: https://github.com/ollama/ollama/issues/2415# logprobs: Optional[List[ChatCompletionTokenLogprob]] = None# if choice.logprobs and choice.logprobs.content:# logprobs = [# ChatCompletionTokenLogprob(# token=x.token,# logprob=x.logprob,# top_logprobs=[TopLogprob(logprob=y.logprob, bytes=y.bytes) for y in x.top_logprobs],# bytes=x.bytes,# )# for x in choice.logprobs.content# ]response=CreateResult(finish_reason=normalize_stop_reason(finish_reason),content=content,usage=usage,cached=False,logprobs=None,)self._total_usage=_add_usage(self._total_usage,usage)self._actual_usage=_add_usage(self._actual_usage,usage)returnresponseasyncdefcreate_stream(self,messages:Sequence[LLMMessage],*,tools:Sequence[Tool|ToolSchema]=[],json_output:Optional[bool|type[BaseModel]]=None,extra_create_args:Mapping[str,Any]={},cancellation_token:Optional[CancellationToken]=None,)->AsyncGenerator[Union[str,CreateResult],None]:# Make sure all extra_create_args are valid# TODO: kwarg checking logic# extra_create_args_keys = set(extra_create_args.keys())# if not create_kwargs.issuperset(extra_create_args_keys):# raise ValueError(f"Extra create args are invalid: {extra_create_args_keys - create_kwargs}")create_params=self._process_create_args(messages,tools,json_output,extra_create_args,)stream_future=asyncio.ensure_future(self._client.chat(# type: ignore# model=self._model_name,messages=create_params.messages,tools=create_params.toolsiflen(create_params.tools)>0elseNone,stream=True,format=create_params.format,**create_params.create_args,))ifcancellation_tokenisnotNone:cancellation_token.link_future(stream_future)stream=awaitstream_futurechunk=Nonestop_reason=Nonecontent_chunks:List[str]=[]full_tool_calls:List[FunctionCall]=[]completion_tokens=0first_chunk=TruewhileTrue:try:chunk_future=asyncio.ensure_future(anext(stream))ifcancellation_tokenisnotNone:cancellation_token.link_future(chunk_future)chunk=awaitchunk_futureiffirst_chunk:first_chunk=False# Emit the start event.logger.info(LLMStreamStartEvent(messages=[m.model_dump()formincreate_params.messages],))# set the stop_reason for the usage chunk to the prior stop_reasonstop_reason=chunk.done_reasonifchunk.doneandstop_reasonisNoneelsestop_reason# First try get contentifchunk.message.contentisnotNone:content_chunks.append(chunk.message.content)iflen(chunk.message.content)>0:yieldchunk.message.contentcontinue# Otherwise, get tool callsifchunk.message.tool_callsisnotNone:full_tool_calls.extend([FunctionCall(id=str(self._tool_id),arguments=json.dumps(x.function.arguments),name=normalize_name(x.function.name),)forxinchunk.message.tool_calls])# TODO: logprobs currently unsupported in ollama.# See: https://github.com/ollama/ollama/issues/2415# if choice.logprobs and choice.logprobs.content:# logprobs = [# ChatCompletionTokenLogprob(# token=x.token,# logprob=x.logprob,# top_logprobs=[TopLogprob(logprob=y.logprob, bytes=y.bytes) for y in x.top_logprobs],# bytes=x.bytes,# )# for x in choice.logprobs.content# ]exceptStopAsyncIteration:breakifchunkandchunk.prompt_eval_count:prompt_tokens=chunk.prompt_eval_countelse:prompt_tokens=0ifstop_reason=="function_call":raiseValueError("Function calls are not supported in this context")content:Union[str,List[FunctionCall]]iflen(content_chunks)>1:content="".join(content_chunks)ifchunkandchunk.eval_count:completion_tokens=chunk.eval_countelse:completion_tokens=0else:completion_tokens=0# TODO: fix assumption that dict values were added in order and actually order by int index# for tool_call in full_tool_calls.values():# # value = json.dumps(tool_call)# # completion_tokens += count_token(value, model=model)# completion_tokens += 0content=full_tool_callsusage=RequestUsage(prompt_tokens=prompt_tokens,completion_tokens=completion_tokens,)result=CreateResult(finish_reason=normalize_stop_reason(stop_reason),content=content,usage=usage,cached=False,logprobs=None,)# Emit the end event.logger.info(LLMStreamEndEvent(response=result.model_dump(),prompt_tokens=usage.prompt_tokens,completion_tokens=usage.completion_tokens,))self._total_usage=_add_usage(self._total_usage,usage)self._actual_usage=_add_usage(self._actual_usage,usage)yieldresultasyncdefclose(self)->None:pass# ollama has no close method?defactual_usage(self)->RequestUsage:returnself._actual_usagedeftotal_usage(self)->RequestUsage:returnself._total_usage# TODO: probably needs workdefcount_tokens(self,messages:Sequence[LLMMessage],*,tools:Sequence[Tool|ToolSchema]=[])->int:model=self._create_args["model"]try:encoding=tiktoken.encoding_for_model(model)exceptKeyError:trace_logger.warning(f"Model {model} not found. Using cl100k_base encoding.")encoding=tiktoken.get_encoding("cl100k_base")tokens_per_message=3num_tokens=0# Message tokens.formessageinmessages:num_tokens+=tokens_per_messageollama_message=to_ollama_type(message)forollama_message_partinollama_message:ifisinstance(message.content,Image):num_tokens+=calculate_vision_tokens(message.content)elifollama_message_part.contentisnotNone:num_tokens+=len(encoding.encode(ollama_message_part.content))# TODO: every model family has its own message sequence.num_tokens+=3# every reply is primed with <|start|>assistant<|message|># Tool tokens.ollama_tools=convert_tools(tools)fortoolinollama_tools:function=tool["function"]tool_tokens=len(encoding.encode(function["name"]))if"description"infunction:tool_tokens+=len(encoding.encode(function["description"]))tool_tokens-=2if"parameters"infunction:parameters=function["parameters"]if"properties"inparameters:assertisinstance(parameters["properties"],dict)forpropertiesKeyinparameters["properties"]:# pyright: ignoreassertisinstance(propertiesKey,str)tool_tokens+=len(encoding.encode(propertiesKey))v=parameters["properties"][propertiesKey]# pyright: ignoreforfieldinv:# pyright: ignoreiffield=="type":tool_tokens+=2tool_tokens+=len(encoding.encode(v["type"]))# pyright: ignoreeliffield=="description":tool_tokens+=2tool_tokens+=len(encoding.encode(v["description"]))# pyright: ignoreeliffield=="enum":tool_tokens-=3foroinv["enum"]:# pyright: ignoretool_tokens+=3tool_tokens+=len(encoding.encode(o))# pyright: ignoreelse:trace_logger.warning(f"Not supported field {field}")tool_tokens+=11iflen(parameters["properties"])==0:# pyright: ignoretool_tokens-=2num_tokens+=tool_tokensnum_tokens+=12returnnum_tokensdefremaining_tokens(self,messages:Sequence[LLMMessage],*,tools:Sequence[Tool|ToolSchema]=[])->int:token_limit=_model_info.get_token_limit(self._create_args["model"])returntoken_limit-self.count_tokens(messages,tools=tools)@propertydefcapabilities(self)->ModelCapabilities:# type: ignorewarnings.warn("capabilities is deprecated, use model_info instead",DeprecationWarning,stacklevel=2)returnself._model_info@propertydefmodel_info(self)->ModelInfo:returnself._model_info# TODO: see if response_format can just be a json blob instead of a BaseModel
[docs]classOllamaChatCompletionClient(BaseOllamaChatCompletionClient,Component[BaseOllamaClientConfigurationConfigModel]):"""Chat completion client for Ollama hosted models. Ollama must be installed and the appropriate model pulled. Args: model (str): Which Ollama model to use. host (optional, str): Model host url. response_format (optional, pydantic.BaseModel): The format of the response. If provided, the response will be parsed into this format as json. options (optional, Mapping[str, Any] | Options): Additional options to pass to the Ollama client. model_info (optional, ModelInfo): The capabilities of the model. **Required if the model is not listed in the ollama model info.** Note: Only models with 200k+ downloads (as of Jan 21, 2025), + phi4, deepseek-r1 have pre-defined model infos. See `this file <https://github.com/microsoft/autogen/blob/main/python/packages/autogen-ext/src/autogen_ext/models/ollama/_model_info.py>`__ for the full list. An entry for one model encompases all parameter variants of that model. To use this client, you must install the `ollama` extension: .. code-block:: bash pip install "autogen-ext[ollama]" The following code snippet shows how to use the client with an Ollama model: .. code-block:: python from autogen_ext.models.ollama import OllamaChatCompletionClient from autogen_core.models import UserMessage ollama_client = OllamaChatCompletionClient( model="llama3", ) result = await ollama_client.create([UserMessage(content="What is the capital of France?", source="user")]) # type: ignore print(result) To load the client from a configuration, you can use the `load_component` method: .. code-block:: python from autogen_core.models import ChatCompletionClient config = { "provider": "OllamaChatCompletionClient", "config": {"model": "llama3"}, } client = ChatCompletionClient.load_component(config) To output structured data, you can use the `response_format` argument: .. code-block:: python from autogen_ext.models.ollama import OllamaChatCompletionClient from autogen_core.models import UserMessage from pydantic import BaseModel class StructuredOutput(BaseModel): first_name: str last_name: str ollama_client = OllamaChatCompletionClient( model="llama3", response_format=StructuredOutput, ) result = await ollama_client.create([UserMessage(content="Who was the first man on the moon?", source="user")]) # type: ignore print(result) Note: Tool usage in ollama is stricter than in its OpenAI counterparts. While OpenAI accepts a map of [str, Any], Ollama requires a map of [str, Property] where Property is a typed object containing ``type`` and ``description`` fields. Therefore, only the keys ``type`` and ``description`` will be converted from the properties blob in the tool schema. To view the full list of available configuration options, see the :py:class:`OllamaClientConfigurationConfigModel` class. """component_type="model"component_config_schema=BaseOllamaClientConfigurationConfigModelcomponent_provider_override="autogen_ext.models.ollama.OllamaChatCompletionClient"def__init__(self,**kwargs:Unpack[BaseOllamaClientConfiguration]):if"model"notinkwargs:raiseValueError("model is required for OllamaChatCompletionClient")model_capabilities:Optional[ModelCapabilities]=None# type: ignorecopied_args=dict(kwargs).copy()if"model_capabilities"inkwargs:model_capabilities=kwargs["model_capabilities"]delcopied_args["model_capabilities"]model_info:Optional[ModelInfo]=Noneif"model_info"inkwargs:model_info=kwargs["model_info"]delcopied_args["model_info"]client=_ollama_client_from_config(copied_args)create_args=_create_args_from_config(copied_args)self._raw_config:Dict[str,Any]=copied_argssuper().__init__(client=client,create_args=create_args,model_capabilities=model_capabilities,model_info=model_info)def__getstate__(self)->Dict[str,Any]:state=self.__dict__.copy()state["_client"]=Nonereturnstatedef__setstate__(self,state:Dict[str,Any])->None:self.__dict__.update(state)self._client=_ollama_client_from_config(state["_raw_config"])