Source code for vivainsights.xicor
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
This module calculates the chatterjee coefficient for a given metric.
"""
import numpy as np
from scipy.stats import rankdata
[docs]
def xicor(x, y, ties=True):
"""
Name
-----
xicor
Description
------------
Calculate Chatterjee's Rank Correlation Coefficient, a measure of association
between two variables, useful for identifying monotonic relationships.
Parameters
-----------
x : array-like
A numeric array representing the independent variable.
y : array-like
A numeric array representing the dependent variable.
ties : bool
A boolean indicating whether to handle ties in the data.
Default is True.
Returns
---------
float: A numeric value representing Chatterjee's rank correlation coefficient.
Raises
-------
ValueError: If the lengths of X and Y are not the same.
Examples:
>>> X = [1, 2, 3, 4, 5]
>>> Y = [2, 1, 4, 3, 5]
>>> xicor(X, Y)
"""
n = len(x)
if n != len(y):
raise ValueError("The length of x and y must be the same.")
# Sort Y based on the order of X
ordered_Y = np.array(y)[np.argsort(x)]
# Get the ranks of Y after sorting by X
r = rankdata(ordered_Y, method='max' if ties else 'ordinal')
if ties:
# Handling ties: Use maximum rank for tied values
l = rankdata(ordered_Y, method='max')
# Calculate Chatterjee's coefficient with ties
return 1 - n * np.sum(np.abs(np.diff(r))) / (2 * np.sum(l * (n - l)))
else:
# No ties: Simplified formula for the Chatterjee coefficient
return 1 - 3 * np.sum(np.abs(np.diff(r))) / (n**2 - 1)