Source code for scistanpy.model.components.transformations.cdfs

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.

"""
Cumulative distribution function transformations for SciStanPy parameters.

This module provides specialized transformation classes for computing cumulative
distribution functions (CDFs) and related probability functions for SciStanPy
model parameters.

The module implements a unified interface for CDF-like computations across
multiple computational backends (NumPy/SciPy, PyTorch) while automatically
generating appropriate Stan code for each transformation.

CDF-like Transformation Types:
    - :py:class:`~scistanpy.model.components.transformations.cdfs.CDF`
    - :py:class:`~scistanpy.model.components.transformations.cdfs.SurvivalFunction`
    - :py:class:`~scistanpy.model.components.transformations.cdfs.LogCDF`
    - :py:class:`~scistanpy.model.components.transformations.cdfs.LogSurvivalFunction`

Each CDF class automatically handles backend-specific implementations:
    - **NumPy/SciPy**: Uses SciPy distribution methods with parameter transforms as needed
    - **PyTorch**: Uses PyTorch distribution objects with appropriate methods
    - **Stan**: Generates function calls with proper parameter ordering

The classes are not intended to be accessed directly. Instead, they are used as
templates by the :py:class:`~scistanpy.model.components.parameters.ParameterMeta`
metaclass to build :py:class:`~scistanpy.model.components.parameters.ParameterMeta`
-specific classes on module import, which are assigned to the ``CDF``, ``LOG_CDF``,
``SF``, and ``LOG_SF`` properties of each :py:class:`~scistanpy.model.components.
parameters.Parameter`.
"""

from __future__ import annotations

from abc import abstractmethod
from typing import TYPE_CHECKING

import numpy as np
import torch

from scistanpy import utils
from scistanpy.model.components.transformations import transformed_parameters

if TYPE_CHECKING:
    from scistanpy import custom_types
    from scistanpy.model.components import parameters



[docs]
class CDFLike(transformed_parameters.TransformedParameter):
    """Base class for cumulative distribution function transformations.

    This abstract base class provides the common infrastructure for all CDF-like
    transformations including parameter validation, backend selection, and
    Stan code generation. It cannot be instantiated directly but serves as the
    foundation for specific CDF transformation types.

    :param x: Input values for CDF evaluation
    :type x: custom_types.CombinableParameterType
    :param shape: Shape of the transformation output. Defaults to ().
    :type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
    :param params: Distribution parameters required for the CDF computation
    :type params: custom_types.CombinableParameterType

    :raises TypeError: If unexpected or missing parameters are provided

    The class provides a unified interface for computing probability functions
    across different computational backends while maintaining compatibility
    with the SciStanPy model component system.

    Key Responsibilities:
        - Parameter validation against expected parameter sets
        - Backend detection and appropriate method dispatch
        - Parameter transformation for SciPy compatibility
        - Stan code generation for probability function calls

    The class automatically handles the complexities of:
        - Converting between parameter naming conventions
        - Applying parameter transformations for different backends
        - Generating appropriate function calls for each backend
    """

    # Class variables for each CDF
    PARAMETER: "parameters.Parameter"
    """
    Reference to the :py:class:`~scistanpy.model.components.parameters.Parameter`
    subclass for which this ``CDFLike`` class applies. Should be set by the metaclass.
    """

    SCIPY_FUNC: str  # cdf, sf, log_cdf, log_sf
    """
    Name of the SciPy method for this operation (e.g., 'cdf', 'sf', 'log_cdf',
    'log_sf'). Should be set by subclasses.
    """

    TORCH_FUNC: str  # cdf, log_cdf, log_sf
    """
    Name of the PyTorch method for this operation (e.g., 'cdf', 'log_cdf', 'log_sf').
    Should be set by subclasses.
    """

    STAN_SUFFIX: str  # The suffix for the Stan operation, e.g., "cdf"
    """
    Suffix for Stan function name generation (e.g., "cdf"). Should be set by subclasses.
    """

    def __init__(
        self,
        x: "custom_types.CombinableParameterType",
        shape: tuple[custom_types.Integer, ...] | custom_types.Integer = (),
        **params: "custom_types.CombinableParameterType",
    ):
        """Initialize CDF transformation with parameter validation.

        :param x: Input values for CDF evaluation
        :type x: custom_types.CombinableParameterType
        :param shape: Shape of the transformation output
        :type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
        :param params: Distribution parameters for the CDF computation
        :type params: custom_types.CombinableParameterType

        :raises TypeError: If parameters don't match those required by the target distribution

        The initialization process validates that all required distribution
        parameters are provided and no unexpected parameters are included.
        """
        # Check if the parameters passed are the ones required for the CDF
        self.check_parameters(set(params.keys()))

        super().__init__(x=x, **params)


[docs]
    def check_parameters(self, kwargset: set[str]) -> None:
        """Validate that provided parameters match distribution requirements.

        :param kwargset: Set of parameter names provided
        :type kwargset: set[str]

        :raises TypeError: If unexpected parameters are provided
        :raises TypeError: If required parameters are missing

        This method ensures that the CDF transformation receives exactly the
        parameters required by the underlying probability distribution, with
        no additional or missing parameters.
        """
        # Make sure that these are the only parameters passed
        if (
            additional_params := kwargset
            - self.__class__.PARAMETER.STAN_TO_SCIPY_NAMES.keys()
        ):
            raise TypeError(
                f"Unexpected parameters {additional_params} passed to "
                f"{self.__class__.__name__}."
            )
        if (
            missing_params := self.__class__.PARAMETER.STAN_TO_SCIPY_NAMES.keys()
            - kwargset
        ):
            raise TypeError(
                f"Missing parameters {missing_params} for {self.__class__.__name__}."
            )



[docs]
    @abstractmethod
    def run_np_torch_op(self, **draws):
        """Execute the CDF-like operation using NumPy or PyTorch backend as appropriate.

        :param draws: Dictionary of parameter draws for the operation
        :type draws: dict

        :returns: CDFLike evaluation results
        :rtype: Union[np.ndarray, torch.Tensor]

        :raises TypeError: If unsupported module type is detected

        This abstract method implements the core computational logic for
        evaluating the CDFLike transformation. It automatically detects the
        computational backend and applies appropriate parameter transformations.

        Backend Handling:
            - **NumPy**: Uses SciPy distribution methods with parameter transforms
            - **PyTorch**: Creates distribution objects and calls appropriate methods
            - **Other**: Raises TypeError for unsupported backends

        The method separates the evaluation point (``x``) from distribution
        parameters and handles backend-specific parameter naming and
        transformation requirements.
        """
        # Get the module for the CDF function
        module = utils.choose_module(next(iter(draws.values())))

        # We need to separate the x value from the draws
        draws_copy = draws.copy()
        x = draws_copy.pop("x")

        # If numpy use scipy dist. If torch, use torch dist. Torch will always
        # return the CDF, so child classes need to override this method.
        if module is np:
            kwargs = {
                self.__class__.PARAMETER.STAN_TO_SCIPY_NAMES[
                    name
                ]: self.__class__.PARAMETER.STAN_TO_SCIPY_TRANSFORMS.get(
                    name, lambda x: x
                )(
                    draw
                )
                for name, draw in draws_copy.items()
            }
            return getattr(self.__class__.PARAMETER.SCIPY_DIST, self.SCIPY_FUNC)(
                x, **kwargs
            )

        # Torch separates distribution creation and function operation, so we need
        # to split out the 'x' value from the draws.
        elif module is torch:

            # Build the distribution
            dist = self.__class__.PARAMETER.TORCH_DIST(
                **{
                    self.__class__.PARAMETER.STAN_TO_TORCH_NAMES[name]: draw
                    for name, draw in draws_copy.items()
                }
            )

            # Run the appropriate function. Some torch dists have custom functions
            # that explicitly calculate the target value. Others extend the CDF.
            return getattr(
                dist,
                (
                    self.__class__.TORCH_FUNC
                    if hasattr(dist, self.__class__.TORCH_FUNC)
                    else "cdf"
                ),
            )(x)
        else:
            raise TypeError(
                f"Unsupported module {module} for CDF operation. "
                "Expected numpy or torch."
            )



[docs]
    def write_stan_operation(self, **kwargs) -> str:
        """Generate Stan code for the ``CDFLike`` operation.

        :param kwargs: Formatted parameter strings for Stan code generation
        :type kwargs: dict[str, str]

        :returns: Stan function call string
        :rtype: str

        This method constructs the appropriate Stan function call for the
        CDF operation, using the distribution name, operation suffix, and
        properly ordered parameters.

        The generated Stan code follows the pattern:
        distribution_suffix(x | param1, param2, ...)

        Where the parameters are ordered according to the distribution's
        Stan parameter ordering conventions.
        """
        # Get the function and arguments for the operation
        func = f"{self.__class__.PARAMETER.STAN_DIST}_{self.STAN_SUFFIX}"
        args = ", ".join(
            kwargs[name] for name in self.__class__.PARAMETER.STAN_TO_SCIPY_NAMES
        )

        return f"{func}({kwargs['x']} | {args})"





[docs]
class CDF(CDFLike):
    r"""Standard cumulative distribution function transformation.

    Computes :math:`P(X \leq x)` for a given distribution and evaluation point.

    :param x: Values at which to evaluate the CDF
    :type x: custom_types.CombinableParameterType
    :param shape: Shape of the output. Defaults to ().
    :type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
    :param params: Distribution parameters
    :type params: custom_types.CombinableParameterType

    Mathematical Definition:
        .. math::
            F(X) = P(X \leq x) = \int_{-\infty}^{x} f(t) dt

    Where :math:`f(t)` is the probability density function of the distribution.

    Common Applications:
        - Computing tail probabilities
        - Implementing truncated distributions
        - Calculating quantiles and percentiles
        - Model validation through probability plots

    Example:
        >>> # Via parameter instance (typical usage)
        >>> normal_param = Normal(mu=0.0, sigma=1.0)
        >>> cdf_transform = normal_param.cdf(x=data_points)
        >>>
        >>> # Direct instantiation (less common)
        >>> cdf_transform = Normal.CDF(x=values, mu=0.0, sigma=1.0)
    """

    SCIPY_FUNC = "cdf"  # The SciPy function for the CDF
    STAN_SUFFIX = "cdf"  # The suffix for the Stan operation
    TORCH_FUNC = "cdf"


[docs]
    def run_np_torch_op(self, **draws):  # pylint: disable=useless-parent-delegation
        """Execute CDF computation using appropriate backend.

        :param draws: Parameter draws for the computation
        :type draws: dict

        :returns: CDF values P(X ≤ x)
        :rtype: Union[np.ndarray, torch.Tensor]

        This implementation uses the base class method directly as both
        NumPy and PyTorch provide direct CDF computation methods.
        """
        # Run using the function returned by the parent method.
        return super().run_np_torch_op(**draws)





[docs]
class SurvivalFunction(CDFLike):
    r"""Survival function (complementary CDF) transformation.

    Computes :math:`P(X \gt x) = 1 - P(X \leq x)` for a given distribution and
    evaluation point.

    :param x: Values at which to evaluate the survival function
    :type x: custom_types.CombinableParameterType
    :param shape: Shape of the output. Defaults to ().
    :type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
    :param params: Distribution parameters
    :type params: custom_types.CombinableParameterType

    Mathematical Definition:
        .. math::
            S(x) = P(X \gt x) = 1 - F(x) = \int_{x}^{\infty} f(t) dt

    Where :math:`F(x)` is the CDF and :math:`f(t)` is the probability density function.

    Common Applications:
        - Survival analysis and time-to-event modeling
        - Reliability engineering and failure analysis
        - Risk assessment and hazard modeling
        - Complementary probability calculations

    The implementation automatically handles backend differences:
        - NumPy: Uses SciPy's direct survival function methods
        - PyTorch: Computes 1 - CDF

    Example:
        >>> # Survival analysis
        >>> survival_times = Exponential(rate=0.1)
        >>> survival_prob = survival_times.ccdf(x=time_points)
    """

    SCIPY_FUNC = "sf"  # The SciPy function for the survival function
    STAN_SUFFIX = "ccdf"  # The suffix for the Stan operation
    TORCH_FUNC = "cdf"


[docs]
    def run_np_torch_op(self, **draws):
        r"""Execute survival function computation with backend-specific handling.

        :param draws: Parameter draws for the computation
        :type draws: dict

        :returns: Survival function values :math:`P(X \gt x)`
        :rtype: Union[np.ndarray, torch.Tensor]

        :raises TypeError: If unsupported output type is encountered

        This method handles the difference between NumPy and PyTorch:
            - NumPy: SciPy provides direct survival function methods
            - PyTorch: Computes 1 - CDF
        """
        # Get the output of the parent method
        output = super().run_np_torch_op(**draws)

        # If using numpy, just return
        if isinstance(output, np.ndarray):
            return output

        # If using torch, subtract from 1 to get the survival function
        elif isinstance(output, torch.Tensor):
            return 1 - output

        else:
            raise TypeError(
                f"Unsupported module {type(output)} for survival function operation. "
                "Expected numpy or torch."
            )





[docs]
class LogCDF(CDFLike):
    r"""Logarithmic cumulative distribution function transformation.

    Computes :math:`\log(P(X \leq x)) = \log(F(x))` for numerical stability when
    dealing with very small probabilities. This is essential for
    computations involving extreme tail probabilities.

    :param x: Values at which to evaluate the log CDF
    :type x: custom_types.CombinableParameterType
    :param shape: Shape of the output. Defaults to ().
    :type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
    :param params: Distribution parameters
    :type params: custom_types.CombinableParameterType

    Mathematical Definition:
        .. math::
            \log F(x) = \log(P(X \leq x))

    Numerical Advantages:
        - Prevents underflow for very small probabilities
        - Enables stable computation in log-space
        - Essential for extreme value analysis

    Common Applications:
        - Extreme value analysis and rare event modeling
        - Numerical optimization in log-space
        - MCMC sampling with extreme parameter values
        - Likelihood computations for tail events

    The implementation handles backend-specific log CDF methods:
        - NumPy: Uses SciPy's logcdf methods when available
        - PyTorch: Uses log_cdf methods or log(cdf) fallback

    Example:
        >>> # Extreme tail probability
        >>> normal_param = Normal(mu=0, sigma=1)
        >>> log_tail_prob = normal_param.log_cdf(x=extreme_values)
    """

    SCIPY_FUNC = "logcdf"  # The SciPy function for the log CDF
    STAN_SUFFIX = "lcdf"  # The suffix for the Stan operation
    TORCH_FUNC = "log_cdf"


[docs]
    def run_np_torch_op(self, **draws):
        r"""Execute log CDF computation with appropriate numerical handling.

        :param draws: Parameter draws for the computation
        :type draws: dict

        :returns: Log CDF values :math:`\log(P(X \leq x))`
        :rtype: Union[np.ndarray, torch.Tensor]

        :raises TypeError: If unsupported output type is encountered
        """
        # As above, get the output of the parent method and return it directly
        # if using numpy.
        output = super().run_np_torch_op(**draws)
        if isinstance(output, np.ndarray):
            return output

        # If using torch, return the log CDF
        elif isinstance(output, torch.Tensor):
            if hasattr(self.__class__.PARAMETER.TORCH_DIST, self.__class__.TORCH_FUNC):
                return output
            return torch.log(output)

        # If using an unsupported type, raise an error
        else:
            raise TypeError(
                f"Unsupported module {type(output)} for log CDF operation. "
                "Expected numpy or torch."
            )





[docs]
class LogSurvivalFunction(CDFLike):
    r"""Logarithmic survival function transformation.

    Computes :math:`\log(P(X > x)) = \log(1 - F(x))` for numerical stability
    when dealing with survival probabilities that may be very close
    to zero or one. Essential for stable survival analysis computations.

    :param x: Values at which to evaluate the log survival function
    :type x: custom_types.CombinableParameterType
    :param shape: Shape of the output. Defaults to ().
    :type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
    :param params: Distribution parameters
    :type params: custom_types.CombinableParameterType

    Mathematical Definition:
        .. math::
            \log S(x) = \log(P(X > x)) = \log(1 - F(x))

        Where :math:`F(x)` is the CDF.

    Numerical Advantages:
        - Prevents underflow for probabilities near 0 or 1
        - Maintains precision for extreme survival times
        - Enables stable log-space arithmetic
        - Critical for numerical stability in survival models

    Common Applications:
        - Survival analysis with extreme event times
        - Reliability engineering with high reliability systems
        - Hazard modeling with rare failure events
        - Log-likelihood computations for survival models

    The implementation provides numerically stable computation:
        - NumPy: Uses SciPy's logsf methods for direct computation
        - PyTorch: Uses log_sf methods or :math:`\text{log1p}(-cdf)` as fallback
    """

    SCIPY_FUNC = "logsf"  # The SciPy function for the log survival function
    STAN_SUFFIX = "lccdf"  # The suffix for the Stan operation
    TORCH_FUNC = "log_sf"


[docs]
    def run_np_torch_op(self, **draws):
        r"""Execute log survival function computation with numerical stability.

        :param draws: Parameter draws for the computation
        :type draws: dict

        :returns: Log survival function values :math:`\log(P(X \gt x))`
        :rtype: Union[np.ndarray, torch.Tensor]

        :raises TypeError: If unsupported output type is encountered

        This method ensures numerical stability by:
            - Using native log survival function methods when available
            - Using :math:`\text{log1p}(-cdf)` for PyTorch when direct methods unavailable
            - Handling precision issues near probability boundaries
        """
        # Get the output of the parent method
        output = super().run_np_torch_op(**draws)

        # If using numpy, return the log survival function directly
        if isinstance(output, np.ndarray):
            return output

        # If using torch, return the log of 1 minus the CDF
        elif isinstance(output, torch.Tensor):
            if hasattr(self.__class__.PARAMETER.TORCH_DIST, self.__class__.TORCH_FUNC):
                return output
            return torch.log1p(-output)

        else:
            raise TypeError(
                f"Unsupported module {type(output)} for log survival function operation. "
                "Expected numpy or torch."
            )
Source code for scistanpy.model.components.transformations.cdfs

SciStanPy

Navigation

Related Topics