Source code for vivainsights.identify_churn
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
Identify and count employees who have churned from or joined the dataset.
This is done by measuring whether an employee who is present in the first `n` (n1) weeks of the data,
is also present in the last `n` (n2) weeks of the data.
An additional use case of this function is the ability to identify "new-joiners" by using the argument `flip`.
"""
__all__ = ['identify_churn']
import pandas as pd
[docs]
def identify_churn(data: pd.DataFrame,
n1 = 6,
n2 = 6,
return_type: str = "message", # avoid using return as a variable name
flip = False,
date_column: str = "MetricDate",
date_format = "%Y-%m-%d"):
"""Identify employees who have churned from or joined the dataset.
Measures whether employees present in the first *n1* weeks are still
present in the last *n2* weeks. Set ``flip=True`` to identify
new joiners instead of churned employees.
Parameters
----------
data : pandas.DataFrame
Person query data.
n1 : int, default 6
Number of initial weeks to check for presence.
n2 : int, default 6
Number of final weeks to check for presence.
return_type : str, default "message"
``"message"`` prints a diagnostic, ``"text"`` returns it as a
string, ``"data"`` returns the set of matching ``PersonId`` values.
flip : bool, default False
If ``True``, identify new joiners rather than churned employees.
date_column : str, default "MetricDate"
Name of the date column.
date_format : str, default "%Y-%m-%d"
``strftime`` format of dates in *date_column*.
Returns
-------
None, str, or set
A printed message, a diagnostic string, or a set of ``PersonId``
values depending on *return_type*.
Examples
--------
Return a diagnostic text summary:
>>> import vivainsights as vi
>>> pq_data = vi.load_pq_data()
>>> vi.identify_churn(pq_data, return_type="text")
Return the set of churned PersonIds:
>>> vi.identify_churn(pq_data, return_type="data")
Flip the logic to detect employees who appear only in later weeks:
>>> vi.identify_churn(pq_data, flip=True, return_type="text")
Customize the number of boundary weeks to compare:
>>> vi.identify_churn(pq_data, n1=3, n2=3, return_type="text")
"""
data[date_column] = pd.to_datetime(data[date_column], format = date_format) # Ensure correct format
unique_dates = data[date_column].unique() # Array of unique dates
# First and last n weeks
firstnweeks = sorted(unique_dates)[:n1]
lastnweeks = sorted(unique_dates, reverse = True)[:n2]
# People in the first week
first_peeps = data[data[date_column].isin(firstnweeks)]['PersonId'].unique()
# People in the last week
final_peeps = data[data[date_column].isin(lastnweeks)]['PersonId'].unique()
if flip == False:
# In first, not in last
churner_id = set(first_peeps) - set(final_peeps)
# Message
printMessage = (f"Churn:\nThere are {len(churner_id)} employees from "
f"{min(firstnweeks).date()} to {max(firstnweeks).date()} "
f"({n1} weeks) who are no longer present in "
f"{min(lastnweeks).date()} to {max(lastnweeks).date()} "
f"({n2} weeks).")
elif flip == True:
# In last, not in first
# new joiners
churner_id = set(final_peeps) - set(first_peeps)
# Message
printMessage = (f"New joiners:\nThere are {len(churner_id)} employees from "
f"{min(lastnweeks).date()} to {max(lastnweeks).date()} "
f"({n2} weeks) who were not present in "
f"{min(firstnweeks).date()} to {max(firstnweeks).date()} "
f"({n1} weeks).")
else:
raise ValueError("Invalid argument for `flip`")
if return_type == "message":
print(printMessage)
elif return_type == "text":
return printMessage
elif return_type == "data":
return churner_id
else:
raise ValueError("Invalid `return`")