Source code for vivainsights.identify_churn
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
This module identifies and counts the number of employees who have churned from the dataset.
This is done by measuring whether an employee who is present in the first `n` (n1) weeks of the data,
is also present in the last `n` (n2) weeks of the data.
An additional use case of this function is the ability to identify "new-joiners" by using the argument `flip`.
"""
import pandas as pd
[docs]
def identify_churn(data: pd.DataFrame,
n1 = 6,
n2 = 6,
return_type: str = "message", # avoid using return as a variable name
flip = False,
date_column: str = "MetricDate",
date_format = "%Y-%m-%d"):
"""
Name
----
identify_churn
Description
-----------
This module identifies and counts the number of employees who have churned from the dataset.
Parameters
---------
data : pandas dataframe
The dataframe to export
n1 : int
First `n` weeks of data to check for the person's presence
n2 : int
Last `n` weeks of data to check for the person's presence
return_type : str
Type of return expected
flip : boolean
Flag to switch between identifying churned users vs new users
date_column : str
DateTime column based on which churn is calculated, defaults to MetricDate for Nova
date_format : datetime
DateTime format in input file, defaults to YYYY-mm-dd
Returns
-------
A different output is returned depending on the value passed to the `return_type` argument:
- "message"`: Message on console. A diagnostic message.
- "text"`: String. A diagnostic message.
- "data"`: Character vector containing the the `PersonId` of employees who have been identified as churned.
"""
data[date_column] = pd.to_datetime(data[date_column], format = date_format) # Ensure correct format
unique_dates = data[date_column].unique() # Array of unique dates
# First and last n weeks
firstnweeks = sorted(unique_dates)[:n1]
lastnweeks = sorted(unique_dates, reverse = True)[:n2]
# People in the first week
first_peeps = data[data[date_column].isin(firstnweeks)]['PersonId'].unique()
# People in the last week
final_peeps = data[data[date_column].isin(lastnweeks)]['PersonId'].unique()
if flip == False:
# In first, not in last
churner_id = set(first_peeps) - set(final_peeps)
# Message
printMessage = (f"Churn:\nThere are {len(churner_id)} employees from "
f"{min(firstnweeks).date()} to {max(firstnweeks).date()} "
f"({n1} weeks) who are no longer present in "
f"{min(lastnweeks).date()} to {max(lastnweeks).date()} "
f"({n2} weeks).")
elif flip == True:
# In last, not in first
# new joiners
churner_id = set(final_peeps) - set(first_peeps)
# Message
printMessage = (f"New joiners:\nThere are {len(churner_id)} employees from "
f"{min(lastnweeks).date()} to {max(lastnweeks).date()} "
f"({n2} weeks) who were not present in "
f"{min(firstnweeks).date()} to {max(firstnweeks).date()} "
f"({n1} weeks).")
else:
raise ValueError("Invalid argument for `flip`")
if return_type == "message":
print(printMessage)
elif return_type == "text":
return printMessage
elif return_type == "data":
return churner_id
else:
raise ValueError("Invalid `return`")