# --------------------------------------------------------------------------------------------# Copyright (c) Microsoft Corporation. All rights reserved.# Licensed under the MIT License. See LICENSE.txt in the project root for license information.# --------------------------------------------------------------------------------------------"""This module identifies and counts the number of employees who have churned from the dataset.This is done by measuring whether an employee who is present in the first `n` (n1) weeks of the data,is also present in the last `n` (n2) weeks of the data.An additional use case of this function is the ability to identify "new-joiners" by using the argument `flip`."""importpandasaspd
[docs]defidentify_churn(data:pd.DataFrame,n1=6,n2=6,return_type:str="message",# avoid using return as a variable nameflip=False,date_column:str="MetricDate",date_format="%Y-%m-%d"):""" Name ---- identify_churn Description ----------- This module identifies and counts the number of employees who have churned from the dataset. Parameters --------- data : pandas dataframe The dataframe to export n1 : int First `n` weeks of data to check for the person's presence n2 : int Last `n` weeks of data to check for the person's presence return_type : str Type of return expected flip : boolean Flag to switch between identifying churned users vs new users date_column : str DateTime column based on which churn is calculated, defaults to MetricDate for Nova date_format : datetime DateTime format in input file, defaults to YYYY-mm-dd Returns ------- A different output is returned depending on the value passed to the `return_type` argument: - "message"`: Message on console. A diagnostic message. - "text"`: String. A diagnostic message. - "data"`: Character vector containing the the `PersonId` of employees who have been identified as churned. """data[date_column]=pd.to_datetime(data[date_column],format=date_format)# Ensure correct formatunique_dates=data[date_column].unique()# Array of unique dates# First and last n weeksfirstnweeks=sorted(unique_dates)[:n1]lastnweeks=sorted(unique_dates,reverse=True)[:n2]# People in the first weekfirst_peeps=data[data[date_column].isin(firstnweeks)]['PersonId'].unique()# People in the last weekfinal_peeps=data[data[date_column].isin(lastnweeks)]['PersonId'].unique()ifflip==False:# In first, not in lastchurner_id=set(first_peeps)-set(final_peeps)# MessageprintMessage=(f"Churn:\nThere are {len(churner_id)} employees from "f"{min(firstnweeks).date()} to {max(firstnweeks).date()} "f"({n1} weeks) who are no longer present in "f"{min(lastnweeks).date()} to {max(lastnweeks).date()} "f"({n2} weeks).")elifflip==True:# In last, not in first# new joinerschurner_id=set(final_peeps)-set(first_peeps)# MessageprintMessage=(f"New joiners:\nThere are {len(churner_id)} employees from "f"{min(lastnweeks).date()} to {max(lastnweeks).date()} "f"({n2} weeks) who were not present in "f"{min(firstnweeks).date()} to {max(firstnweeks).date()} "f"({n1} weeks).")else:raiseValueError("Invalid argument for `flip`")ifreturn_type=="message":print(printMessage)elifreturn_type=="text":returnprintMessageelifreturn_type=="data":returnchurner_idelse:raiseValueError("Invalid `return`")