Module tinytroupe.utils.validation

Expand source code
import json 
import sys
import unicodedata

from pydantic import ValidationError, BaseModel
from tinytroupe.utils import logger

################################################################################
# Validation
################################################################################
def check_valid_fields(obj: dict, valid_fields: list) -> None:
    """
    Checks whether the fields in the specified dict are valid, according to the list of valid fields. If not, raises a ValueError.
    """
    for key in obj:
        if key not in valid_fields:
            raise ValueError(f"Invalid key {key} in dictionary. Valid keys are: {valid_fields}")

def sanitize_raw_string(value: str) -> str:
    """
    Sanitizes the specified string by: 
      - removing any invalid characters.
      - ensuring it is not longer than the maximum Python string length.
    
    This is for an abundance of caution with security, to avoid any potential issues with the string.
    """

    # remove any invalid characters by making sure it is a valid UTF-8 string
    value = value.encode("utf-8", "ignore").decode("utf-8")

    value = unicodedata.normalize("NFC", value)


    # ensure it is not longer than the maximum Python string length
    return value[:sys.maxsize]

def sanitize_dict(value: dict) -> dict:
    """
    Sanitizes the specified dictionary by:
      - removing any invalid characters.
      - ensuring that the dictionary is not too deeply nested.
    """

    # sanitize the string representation of the dictionary
    for k, v in value.items():
        if isinstance(v, str):
            value[k] = sanitize_raw_string(v)

    # ensure that the dictionary is not too deeply nested
    return value

def to_pydantic_or_sanitized_dict(value: dict, model: BaseModel=None) -> dict:
    """
    Converts the specified model response dictionary to a Pydantic model instance, or sanitizes it if the model is not valid.
    It is assumed that the dict contains the `content` key.
    """

    if model is not None and (isinstance(model, type) and issubclass(model, BaseModel)):
        # If a model is provided, try to validate the value against the model
        try:
            res = model.model_validate(sanitize_dict(json.loads(value['content'])))
            return res
        except ValidationError as e:
            logger.warning(f"Validation error: {e}")
            return sanitize_dict(value)
    else:
        return sanitize_dict(value)  # If no model, just sanitize the dict

Functions

def check_valid_fields(obj: dict, valid_fields: list) ‑> None

Checks whether the fields in the specified dict are valid, according to the list of valid fields. If not, raises a ValueError.

Expand source code
def check_valid_fields(obj: dict, valid_fields: list) -> None:
    """
    Checks whether the fields in the specified dict are valid, according to the list of valid fields. If not, raises a ValueError.
    """
    for key in obj:
        if key not in valid_fields:
            raise ValueError(f"Invalid key {key} in dictionary. Valid keys are: {valid_fields}")
def sanitize_dict(value: dict) ‑> dict

Sanitizes the specified dictionary by: - removing any invalid characters. - ensuring that the dictionary is not too deeply nested.

Expand source code
def sanitize_dict(value: dict) -> dict:
    """
    Sanitizes the specified dictionary by:
      - removing any invalid characters.
      - ensuring that the dictionary is not too deeply nested.
    """

    # sanitize the string representation of the dictionary
    for k, v in value.items():
        if isinstance(v, str):
            value[k] = sanitize_raw_string(v)

    # ensure that the dictionary is not too deeply nested
    return value
def sanitize_raw_string(value: str) ‑> str

Sanitizes the specified string by: - removing any invalid characters. - ensuring it is not longer than the maximum Python string length.

This is for an abundance of caution with security, to avoid any potential issues with the string.

Expand source code
def sanitize_raw_string(value: str) -> str:
    """
    Sanitizes the specified string by: 
      - removing any invalid characters.
      - ensuring it is not longer than the maximum Python string length.
    
    This is for an abundance of caution with security, to avoid any potential issues with the string.
    """

    # remove any invalid characters by making sure it is a valid UTF-8 string
    value = value.encode("utf-8", "ignore").decode("utf-8")

    value = unicodedata.normalize("NFC", value)


    # ensure it is not longer than the maximum Python string length
    return value[:sys.maxsize]
def to_pydantic_or_sanitized_dict(value: dict, model: pydantic.main.BaseModel = None) ‑> dict

Converts the specified model response dictionary to a Pydantic model instance, or sanitizes it if the model is not valid. It is assumed that the dict contains the content key.

Expand source code
def to_pydantic_or_sanitized_dict(value: dict, model: BaseModel=None) -> dict:
    """
    Converts the specified model response dictionary to a Pydantic model instance, or sanitizes it if the model is not valid.
    It is assumed that the dict contains the `content` key.
    """

    if model is not None and (isinstance(model, type) and issubclass(model, BaseModel)):
        # If a model is provided, try to validate the value against the model
        try:
            res = model.model_validate(sanitize_dict(json.loads(value['content'])))
            return res
        except ValidationError as e:
            logger.warning(f"Validation error: {e}")
            return sanitize_dict(value)
    else:
        return sanitize_dict(value)  # If no model, just sanitize the dict