Source code for vivainsights.xicor
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
Calculate the Chatterjee (xi) correlation coefficient for a given metric.
"""
__all__ = ['xicor']
import numpy as np
from scipy.stats import rankdata
[docs]
def xicor(x, y, ties=True):
"""
Calculate Chatterjee's rank correlation coefficient.
A measure of association between two variables, useful for identifying
monotonic relationships.
Parameters
----------
x : array-like
Numeric array representing the independent variable.
y : array-like
Numeric array representing the dependent variable.
ties : bool, optional
Whether to handle ties in the data. Defaults to ``True``.
Returns
-------
float
Chatterjee's rank correlation coefficient.
Raises
------
ValueError
If ``x`` and ``y`` have different lengths.
Examples
--------
Compute the correlation with tied values handled (default):
>>> from vivainsights import xicor
>>> X = [1, 2, 3, 4, 5]
>>> Y = [2, 1, 4, 3, 5]
>>> xicor(X, Y)
Disable tie correction:
>>> xicor(X, Y, ties=False)
"""
n = len(x)
if n != len(y):
raise ValueError("The length of x and y must be the same.")
# Sort Y based on the order of X
ordered_Y = np.array(y)[np.argsort(x)]
# Get the ranks of Y after sorting by X
r = rankdata(ordered_Y, method='max' if ties else 'ordinal')
if ties:
# Handling ties: Use maximum rank for tied values
l = rankdata(ordered_Y, method='max')
# Calculate Chatterjee's coefficient with ties
return 1 - n * np.sum(np.abs(np.diff(r))) / (2 * np.sum(l * (n - l)))
else:
# No ties: Simplified formula for the Chatterjee coefficient
return 1 - 3 * np.sum(np.abs(np.diff(r))) / (n**2 - 1)