# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
"""
Cumulative distribution function transformations for SciStanPy parameters.
This module provides specialized transformation classes for computing cumulative
distribution functions (CDFs) and related probability functions for SciStanPy
model parameters.
The module implements a unified interface for CDF-like computations across
multiple computational backends (NumPy/SciPy, PyTorch) while automatically
generating appropriate Stan code for each transformation.
CDF-like Transformation Types:
- :py:class:`~scistanpy.model.components.transformations.cdfs.CDF`
- :py:class:`~scistanpy.model.components.transformations.cdfs.SurvivalFunction`
- :py:class:`~scistanpy.model.components.transformations.cdfs.LogCDF`
- :py:class:`~scistanpy.model.components.transformations.cdfs.LogSurvivalFunction`
Each CDF class automatically handles backend-specific implementations:
- **NumPy/SciPy**: Uses SciPy distribution methods with parameter transforms as needed
- **PyTorch**: Uses PyTorch distribution objects with appropriate methods
- **Stan**: Generates function calls with proper parameter ordering
The classes are not intended to be accessed directly. Instead, they are used as
templates by the :py:class:`~scistanpy.model.components.parameters.ParameterMeta`
metaclass to build :py:class:`~scistanpy.model.components.parameters.ParameterMeta`
-specific classes on module import, which are assigned to the ``CDF``, ``LOG_CDF``,
``SF``, and ``LOG_SF`` properties of each :py:class:`~scistanpy.model.components.
parameters.Parameter`.
"""
from __future__ import annotations
from abc import abstractmethod
from typing import TYPE_CHECKING
import numpy as np
import torch
from scistanpy import utils
from scistanpy.model.components.transformations import transformed_parameters
if TYPE_CHECKING:
from scistanpy import custom_types
from scistanpy.model.components import parameters
[docs]
class CDFLike(transformed_parameters.TransformedParameter):
"""Base class for cumulative distribution function transformations.
This abstract base class provides the common infrastructure for all CDF-like
transformations including parameter validation, backend selection, and
Stan code generation. It cannot be instantiated directly but serves as the
foundation for specific CDF transformation types.
:param x: Input values for CDF evaluation
:type x: custom_types.CombinableParameterType
:param shape: Shape of the transformation output. Defaults to ().
:type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
:param params: Distribution parameters required for the CDF computation
:type params: custom_types.CombinableParameterType
:raises TypeError: If unexpected or missing parameters are provided
The class provides a unified interface for computing probability functions
across different computational backends while maintaining compatibility
with the SciStanPy model component system.
Key Responsibilities:
- Parameter validation against expected parameter sets
- Backend detection and appropriate method dispatch
- Parameter transformation for SciPy compatibility
- Stan code generation for probability function calls
The class automatically handles the complexities of:
- Converting between parameter naming conventions
- Applying parameter transformations for different backends
- Generating appropriate function calls for each backend
"""
# Class variables for each CDF
PARAMETER: "parameters.Parameter"
"""
Reference to the :py:class:`~scistanpy.model.components.parameters.Parameter`
subclass for which this ``CDFLike`` class applies. Should be set by the metaclass.
"""
SCIPY_FUNC: str # cdf, sf, log_cdf, log_sf
"""
Name of the SciPy method for this operation (e.g., 'cdf', 'sf', 'log_cdf',
'log_sf'). Should be set by subclasses.
"""
TORCH_FUNC: str # cdf, log_cdf, log_sf
"""
Name of the PyTorch method for this operation (e.g., 'cdf', 'log_cdf', 'log_sf').
Should be set by subclasses.
"""
STAN_SUFFIX: str # The suffix for the Stan operation, e.g., "cdf"
"""
Suffix for Stan function name generation (e.g., "cdf"). Should be set by subclasses.
"""
def __init__(
self,
x: "custom_types.CombinableParameterType",
shape: tuple[custom_types.Integer, ...] | custom_types.Integer = (),
**params: "custom_types.CombinableParameterType",
):
"""Initialize CDF transformation with parameter validation.
:param x: Input values for CDF evaluation
:type x: custom_types.CombinableParameterType
:param shape: Shape of the transformation output
:type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
:param params: Distribution parameters for the CDF computation
:type params: custom_types.CombinableParameterType
:raises TypeError: If parameters don't match those required by the target distribution
The initialization process validates that all required distribution
parameters are provided and no unexpected parameters are included.
"""
# Check if the parameters passed are the ones required for the CDF
self.check_parameters(set(params.keys()))
super().__init__(x=x, **params)
[docs]
def check_parameters(self, kwargset: set[str]) -> None:
"""Validate that provided parameters match distribution requirements.
:param kwargset: Set of parameter names provided
:type kwargset: set[str]
:raises TypeError: If unexpected parameters are provided
:raises TypeError: If required parameters are missing
This method ensures that the CDF transformation receives exactly the
parameters required by the underlying probability distribution, with
no additional or missing parameters.
"""
# Make sure that these are the only parameters passed
if (
additional_params := kwargset
- self.__class__.PARAMETER.STAN_TO_SCIPY_NAMES.keys()
):
raise TypeError(
f"Unexpected parameters {additional_params} passed to "
f"{self.__class__.__name__}."
)
if (
missing_params := self.__class__.PARAMETER.STAN_TO_SCIPY_NAMES.keys()
- kwargset
):
raise TypeError(
f"Missing parameters {missing_params} for {self.__class__.__name__}."
)
[docs]
@abstractmethod
def run_np_torch_op(self, **draws):
"""Execute the CDF-like operation using NumPy or PyTorch backend as appropriate.
:param draws: Dictionary of parameter draws for the operation
:type draws: dict
:returns: CDFLike evaluation results
:rtype: Union[np.ndarray, torch.Tensor]
:raises TypeError: If unsupported module type is detected
This abstract method implements the core computational logic for
evaluating the CDFLike transformation. It automatically detects the
computational backend and applies appropriate parameter transformations.
Backend Handling:
- **NumPy**: Uses SciPy distribution methods with parameter transforms
- **PyTorch**: Creates distribution objects and calls appropriate methods
- **Other**: Raises TypeError for unsupported backends
The method separates the evaluation point (``x``) from distribution
parameters and handles backend-specific parameter naming and
transformation requirements.
"""
# Get the module for the CDF function
module = utils.choose_module(next(iter(draws.values())))
# We need to separate the x value from the draws
draws_copy = draws.copy()
x = draws_copy.pop("x")
# If numpy use scipy dist. If torch, use torch dist. Torch will always
# return the CDF, so child classes need to override this method.
if module is np:
kwargs = {
self.__class__.PARAMETER.STAN_TO_SCIPY_NAMES[
name
]: self.__class__.PARAMETER.STAN_TO_SCIPY_TRANSFORMS.get(
name, lambda x: x
)(
draw
)
for name, draw in draws_copy.items()
}
return getattr(self.__class__.PARAMETER.SCIPY_DIST, self.SCIPY_FUNC)(
x, **kwargs
)
# Torch separates distribution creation and function operation, so we need
# to split out the 'x' value from the draws.
elif module is torch:
# Build the distribution
dist = self.__class__.PARAMETER.TORCH_DIST(
**{
self.__class__.PARAMETER.STAN_TO_TORCH_NAMES[name]: draw
for name, draw in draws_copy.items()
}
)
# Run the appropriate function. Some torch dists have custom functions
# that explicitly calculate the target value. Others extend the CDF.
return getattr(
dist,
(
self.__class__.TORCH_FUNC
if hasattr(dist, self.__class__.TORCH_FUNC)
else "cdf"
),
)(x)
else:
raise TypeError(
f"Unsupported module {module} for CDF operation. "
"Expected numpy or torch."
)
[docs]
def write_stan_operation(self, **kwargs) -> str:
"""Generate Stan code for the ``CDFLike`` operation.
:param kwargs: Formatted parameter strings for Stan code generation
:type kwargs: dict[str, str]
:returns: Stan function call string
:rtype: str
This method constructs the appropriate Stan function call for the
CDF operation, using the distribution name, operation suffix, and
properly ordered parameters.
The generated Stan code follows the pattern:
distribution_suffix(x | param1, param2, ...)
Where the parameters are ordered according to the distribution's
Stan parameter ordering conventions.
"""
# Get the function and arguments for the operation
func = f"{self.__class__.PARAMETER.STAN_DIST}_{self.STAN_SUFFIX}"
args = ", ".join(
kwargs[name] for name in self.__class__.PARAMETER.STAN_TO_SCIPY_NAMES
)
return f"{func}({kwargs['x']} | {args})"
[docs]
class CDF(CDFLike):
r"""Standard cumulative distribution function transformation.
Computes :math:`P(X \leq x)` for a given distribution and evaluation point.
:param x: Values at which to evaluate the CDF
:type x: custom_types.CombinableParameterType
:param shape: Shape of the output. Defaults to ().
:type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
:param params: Distribution parameters
:type params: custom_types.CombinableParameterType
Mathematical Definition:
.. math::
F(X) = P(X \leq x) = \int_{-\infty}^{x} f(t) dt
Where :math:`f(t)` is the probability density function of the distribution.
Common Applications:
- Computing tail probabilities
- Implementing truncated distributions
- Calculating quantiles and percentiles
- Model validation through probability plots
Example:
>>> # Via parameter instance (typical usage)
>>> normal_param = Normal(mu=0.0, sigma=1.0)
>>> cdf_transform = normal_param.cdf(x=data_points)
>>>
>>> # Direct instantiation (less common)
>>> cdf_transform = Normal.CDF(x=values, mu=0.0, sigma=1.0)
"""
SCIPY_FUNC = "cdf" # The SciPy function for the CDF
STAN_SUFFIX = "cdf" # The suffix for the Stan operation
TORCH_FUNC = "cdf"
[docs]
def run_np_torch_op(self, **draws): # pylint: disable=useless-parent-delegation
"""Execute CDF computation using appropriate backend.
:param draws: Parameter draws for the computation
:type draws: dict
:returns: CDF values P(X ≤ x)
:rtype: Union[np.ndarray, torch.Tensor]
This implementation uses the base class method directly as both
NumPy and PyTorch provide direct CDF computation methods.
"""
# Run using the function returned by the parent method.
return super().run_np_torch_op(**draws)
[docs]
class SurvivalFunction(CDFLike):
r"""Survival function (complementary CDF) transformation.
Computes :math:`P(X \gt x) = 1 - P(X \leq x)` for a given distribution and
evaluation point.
:param x: Values at which to evaluate the survival function
:type x: custom_types.CombinableParameterType
:param shape: Shape of the output. Defaults to ().
:type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
:param params: Distribution parameters
:type params: custom_types.CombinableParameterType
Mathematical Definition:
.. math::
S(x) = P(X \gt x) = 1 - F(x) = \int_{x}^{\infty} f(t) dt
Where :math:`F(x)` is the CDF and :math:`f(t)` is the probability density function.
Common Applications:
- Survival analysis and time-to-event modeling
- Reliability engineering and failure analysis
- Risk assessment and hazard modeling
- Complementary probability calculations
The implementation automatically handles backend differences:
- NumPy: Uses SciPy's direct survival function methods
- PyTorch: Computes 1 - CDF
Example:
>>> # Survival analysis
>>> survival_times = Exponential(rate=0.1)
>>> survival_prob = survival_times.ccdf(x=time_points)
"""
SCIPY_FUNC = "sf" # The SciPy function for the survival function
STAN_SUFFIX = "ccdf" # The suffix for the Stan operation
TORCH_FUNC = "cdf"
[docs]
def run_np_torch_op(self, **draws):
r"""Execute survival function computation with backend-specific handling.
:param draws: Parameter draws for the computation
:type draws: dict
:returns: Survival function values :math:`P(X \gt x)`
:rtype: Union[np.ndarray, torch.Tensor]
:raises TypeError: If unsupported output type is encountered
This method handles the difference between NumPy and PyTorch:
- NumPy: SciPy provides direct survival function methods
- PyTorch: Computes 1 - CDF
"""
# Get the output of the parent method
output = super().run_np_torch_op(**draws)
# If using numpy, just return
if isinstance(output, np.ndarray):
return output
# If using torch, subtract from 1 to get the survival function
elif isinstance(output, torch.Tensor):
return 1 - output
else:
raise TypeError(
f"Unsupported module {type(output)} for survival function operation. "
"Expected numpy or torch."
)
[docs]
class LogCDF(CDFLike):
r"""Logarithmic cumulative distribution function transformation.
Computes :math:`\log(P(X \leq x)) = \log(F(x))` for numerical stability when
dealing with very small probabilities. This is essential for
computations involving extreme tail probabilities.
:param x: Values at which to evaluate the log CDF
:type x: custom_types.CombinableParameterType
:param shape: Shape of the output. Defaults to ().
:type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
:param params: Distribution parameters
:type params: custom_types.CombinableParameterType
Mathematical Definition:
.. math::
\log F(x) = \log(P(X \leq x))
Numerical Advantages:
- Prevents underflow for very small probabilities
- Enables stable computation in log-space
- Essential for extreme value analysis
Common Applications:
- Extreme value analysis and rare event modeling
- Numerical optimization in log-space
- MCMC sampling with extreme parameter values
- Likelihood computations for tail events
The implementation handles backend-specific log CDF methods:
- NumPy: Uses SciPy's logcdf methods when available
- PyTorch: Uses log_cdf methods or log(cdf) fallback
Example:
>>> # Extreme tail probability
>>> normal_param = Normal(mu=0, sigma=1)
>>> log_tail_prob = normal_param.log_cdf(x=extreme_values)
"""
SCIPY_FUNC = "logcdf" # The SciPy function for the log CDF
STAN_SUFFIX = "lcdf" # The suffix for the Stan operation
TORCH_FUNC = "log_cdf"
[docs]
def run_np_torch_op(self, **draws):
r"""Execute log CDF computation with appropriate numerical handling.
:param draws: Parameter draws for the computation
:type draws: dict
:returns: Log CDF values :math:`\log(P(X \leq x))`
:rtype: Union[np.ndarray, torch.Tensor]
:raises TypeError: If unsupported output type is encountered
"""
# As above, get the output of the parent method and return it directly
# if using numpy.
output = super().run_np_torch_op(**draws)
if isinstance(output, np.ndarray):
return output
# If using torch, return the log CDF
elif isinstance(output, torch.Tensor):
if hasattr(self.__class__.PARAMETER.TORCH_DIST, self.__class__.TORCH_FUNC):
return output
return torch.log(output)
# If using an unsupported type, raise an error
else:
raise TypeError(
f"Unsupported module {type(output)} for log CDF operation. "
"Expected numpy or torch."
)
[docs]
class LogSurvivalFunction(CDFLike):
r"""Logarithmic survival function transformation.
Computes :math:`\log(P(X > x)) = \log(1 - F(x))` for numerical stability
when dealing with survival probabilities that may be very close
to zero or one. Essential for stable survival analysis computations.
:param x: Values at which to evaluate the log survival function
:type x: custom_types.CombinableParameterType
:param shape: Shape of the output. Defaults to ().
:type shape: Union[tuple[custom_types.Integer, ...], custom_types.Integer]
:param params: Distribution parameters
:type params: custom_types.CombinableParameterType
Mathematical Definition:
.. math::
\log S(x) = \log(P(X > x)) = \log(1 - F(x))
Where :math:`F(x)` is the CDF.
Numerical Advantages:
- Prevents underflow for probabilities near 0 or 1
- Maintains precision for extreme survival times
- Enables stable log-space arithmetic
- Critical for numerical stability in survival models
Common Applications:
- Survival analysis with extreme event times
- Reliability engineering with high reliability systems
- Hazard modeling with rare failure events
- Log-likelihood computations for survival models
The implementation provides numerically stable computation:
- NumPy: Uses SciPy's logsf methods for direct computation
- PyTorch: Uses log_sf methods or :math:`\text{log1p}(-cdf)` as fallback
"""
SCIPY_FUNC = "logsf" # The SciPy function for the log survival function
STAN_SUFFIX = "lccdf" # The suffix for the Stan operation
TORCH_FUNC = "log_sf"
[docs]
def run_np_torch_op(self, **draws):
r"""Execute log survival function computation with numerical stability.
:param draws: Parameter draws for the computation
:type draws: dict
:returns: Log survival function values :math:`\log(P(X \gt x))`
:rtype: Union[np.ndarray, torch.Tensor]
:raises TypeError: If unsupported output type is encountered
This method ensures numerical stability by:
- Using native log survival function methods when available
- Using :math:`\text{log1p}(-cdf)` for PyTorch when direct methods unavailable
- Handling precision issues near probability boundaries
"""
# Get the output of the parent method
output = super().run_np_torch_op(**draws)
# If using numpy, return the log survival function directly
if isinstance(output, np.ndarray):
return output
# If using torch, return the log of 1 minus the CDF
elif isinstance(output, torch.Tensor):
if hasattr(self.__class__.PARAMETER.TORCH_DIST, self.__class__.TORCH_FUNC):
return output
return torch.log1p(-output)
else:
raise TypeError(
f"Unsupported module {type(output)} for log survival function operation. "
"Expected numpy or torch."
)