Source code for vivainsights.identify_inactiveweeks

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
The function `identify_inactiveweeks` identifies weeks where collaboration hours are more than a
specified number of standard deviations below the mean and returns the result in the specified
format.
"""
import pandas as pd
from vivainsights.create_bar import create_bar_calc

[docs] def identify_inactiveweeks(data: pd.DataFrame, sd=2, return_type="text"): """ Name ---- identify_inactiveweeks Description ----------- The function `identify_inactiveweeks` identifies weeks where collaboration hours are more than a specified number of standard deviations below the mean and returns the result in the specified format. Parameters ---------- data : pandas dataframe The `data` parameter is a pandas DataFrame that contains the following columns: sd : int The `sd` parameter stands for the number of standard deviations below the mean that is considered as inactive. In this code, it is used to identify weeks where the collaboration hours are more than `sd` standard deviations below the mean, defaults to 2 (optional) return_type : str The `return_type` parameter determines the type of output that the function will return. It can have the following values:, defaults to text (optional) - 'text': Returns a string with the number of inactive weeks. - 'data_dirty' or 'dirty_data': Returns a Pandas DataFrame with the rows that are inactive. - 'data_cleaned' or 'cleaned_data': Returns a Pandas DataFrame with the rows that are not inactive. - 'plot': Returns a plot showing the number of inactive weeks for each user. - 'data': Returns a Pandas DataFrame with the number of inactive weeks for each user. The default value is 'text'. Returns ------- The function `identify_inactiveweeks` returns different outputs based on the value of the `return_type` parameter. """ # Z score calculation data['z_score'] = (data['Collaboration_hours'] - data.groupby('PersonId')['Collaboration_hours'].transform('mean')) / data.groupby('PersonId')['Collaboration_hours'].transform('std') Calc = data[data["z_score"] <= -sd][["PersonId", "MetricDate", "z_score"]].reset_index(drop=True) # standard deviations below the mean data['Total'] = 'Total' result = create_bar_calc(data, metric='Collaboration_hours', hrvar='Total') collab_hours = result['metric'].round(1).to_frame()["metric"][0] # output when return_type is text message = f"There are {Calc.shape[0]} rows of data with weekly collaboration hours more than {sd} standard deviations below the mean {collab_hours}." # Output conditions based on return_type if return_type == "text": return message elif return_type == "data_dirty" or return_type == "dirty_data": return data[data["z_score"] <= -sd].drop(columns=["z_score"]) elif return_type == "data_cleaned" or return_type == "cleaned_data": return data[data["z_score"] > -sd].drop(columns=["z_score"]) elif return_type == "data": return data.assign(inactiveweek=(data["z_score"] <= -sd)).drop(columns=["z_score"]) else: raise ValueError("Error: please check inputs for `return_type`")