Source code for vivainsights.keymetrics_scan

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------

"""
This script generate a summary of key metrics with options to return a heatmap or a summary table.
"""

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from vivainsights.extract_date_range import extract_date_range
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import Normalize
from matplotlib.lines import Line2D
from vivainsights.us_to_space import us_to_space


[docs]
def keymetrics_scan(data,
                    hrvar="Organization",
                    mingroup=5,
                    metrics=["Workweek_span",
                             "Collaboration_hours",
                             "After_hours_collaboration_hours",
                             "Meetings",
                             "Meeting_hours",
                             "After_hours_meeting_hours",
                             "Low_quality_meeting_hours",
                             "Meeting_hours_with_manager_1_on_1",
                             "Meeting_hours_with_manager",
                             "Emails_sent",
                             "Email_hours",
                             "After_hours_email_hours",
                             "Generated_workload_email_hours",
                             "Total_focus_hours",
                             "Internal_network_size",
                             "Networking_outside_organization",
                             "External_network_size",
                             "Networking_outside_company"],
                    return_type="plot",
                    low_color="#4169E1",
                    mid_color="#F1CC9E",
                    high_color="#D8182A",
                    textsize=10,
                    plot_row_scaling_factor=0.8):    
    """
    Name
    ----
    keymetrics_scan

    Description
    ------------
    Generate a summary of key metrics with options to return a heatmap or a summary table.

    Parameters
    ----------
    data : pandas.DataFrame
        A Person Query dataset in the form of a pandas dataframe.
    hrvar : str, optional
        The column name to group the data by. Defaults to `"Organization"`.
    mingroup : int, optional
        The minimum number of employees required to include a group in the analysis. Defaults to `5`.
    metrics : list of str, optional
        A list of metric column names to calculate averages for. Defaults to:
        - `"Workweek_span"`
        - `"Collaboration_hours"`
        - `"After_hours_collaboration_hours"`
        - `"Meetings"`
        - `"Meeting_hours"`
        - `"After_hours_meeting_hours"`
        - `"Low_quality_meeting_hours"`
        - `"Meeting_hours_with_manager_1_on_1"`
        - `"Meeting_hours_with_manager"`
        - `"Emails_sent"`
        - `"Email_hours"`
        - `"After_hours_email_hours"`
        - `"Generated_workload_email_hours"`
        - `"Total_focus_hours"`
        - `"Internal_network_size"`
        - `"Networking_outside_organization"`
        - `"External_network_size"`
        - `"Networking_outside_company"`
    return_type : str, optional
        Specifies the type of output to return. Valid values are:
        - `"plot"` (default): Generate a heatmap visualization.
        - `"table"`: Return a summary table as a pandas DataFrame.
    mid_color : str, optional
    high_color: str, optional
    low_color : str, optional
        Color codes for low, mid, and high values in the heatmap. Defaults to:
        - `low_color="#4169E1"` (blue)
        - `mid_color="#F1CC9E"` (beige)
        - `high_color="#D8182A"` (red)
        Color codes for low, mid, and high values in the heatmap. Can be set to:
        - `Black: "#000000"`
        - `White: "#FFFFFF"`
        - `Red: "#FF0000"`
        - `Lime: "#00FF00"`
        - `Blue: "#0000FF"`
        - `Yellow: "#FFFF00"`
        - `Cyan/Aqua: "#00FFFF"`
        - `Magenta/Fuchsia: "#FF00FF"`
        - `Silver: "#C0C0C0"`
        - `Gray: "#808080"`
        - `Maroon: "#800000"`
        - `Olive: "#808000"`
        - `Green: "#008000"`
        - `Purple: "#800080"`
        - `Teal: "#008080"`
        - `Navy: "#000080"`
        - `Light Gray: "#D3D3D3"`
        - `Dark Gray: "#A9A9A9"`
        - `Dim Gray: "#696969"`
        - `Slate Gray: "#708090"`
        - `Light Slate Gray: "#778899"`
        - `Crimson: "#DC143C"`
        - `Coral: "#FF7F50"`
        - `Tomato: "#FF6347"`
        - `Orange: "#FFA500"`
        - `Gold: "#FFD700"`
        - `Dark Orange: "#FF8C00"`
        - `Light Salmon: "#FFA07A"`
        - `Dodger Blue: "#1E90FF"`
        - `Sky Blue: "#87CEEB"`
        - `Steel Blue: "#4682B4"`
        - `Light Blue: "#ADD8E6"`
        - `Dark Blue: "#00008B"`
        - `Medium Blue: "#0000CD"`
        - `Royal Blue: "#4169E1"`
        - `Sienna: "#A0522D"`
        - `Saddle Brown: "#8B4513"`
        - `Chocolate: "#D2691E"`
        - `Peru: "#CD853F"`
        - `Sandy Brown: "#F4A460"`
        - `Tan: "#D2B48C"`
        - `Lavender: "#E6E6FA"`
        - `Thistle: "#D8BFD8"`
        - `Plum: "#DDA0DD"`
        - `Orchid: "#DA70D6"`
        - `Peach Puff: "#FFDAB9"`
        - `Mint Cream: "#F5FFFA"`
        - `Forest Green: "#228B22"`
        - `Sea Green: "#2E8B57"`
        - `Medium Sea Green: "#3CB371"`
        - `Spring Green: "#00FF7F"`
        - `Pale Green: "#98FB98"`
        - `Indian Red: "#CD5C5C"`
        - `Rosy Brown: "#BC8F8F"`
        - `Hot Pink: "#FF69B4"`
        - `Deep Pink: "#FF1493"`
        - `Light Pink: "#FFB6C1"`
        - `Midnight Blue: "#191970"`
        - `Cornflower Blue: "#6495ED"`
        - `Powder Blue: "#B0E0E6"`
        - `Light Sky Blue: "#87CEFA"`
    textsize : int, optional
        Font size for text elements in the heatmap. Defaults to `10`.

    Returns
    -------
    - If `return_type="plot"`: Displays a heatmap visualization of the rescaled key metrics grouped by the specified HR attribute.
    - If `return_type="table"`: Returns a pandas DataFrame containing a summary table of average metric values grouped by the specified HR attribute.

    Raises
    ------
    ValueError
        - If none of the specified metrics are present in the dataset.
        - If no data is available after applying the `mingroup` filter.

    Examples
    --------
    >>> import vivainsights as vi
    >>> pq_data = vi.load_pq_data()
    >>> vi.keymetrics_scan(data=pq_data, hrvar="Organization", mingroup=10, return_type="table")
    # Returns a summary table grouped by "Team" with a minimum group size of 10.

    >>> vi.keymetrics_scan(data=pq_data, hrvar="Organization", metrics=["Workweek_span", "Meeting_hours"], return_type="plot")
    # Displays a heatmap of the rescaled "Workweek_span" and "Meeting_hours" metrics grouped by "Department".

    >>> vi.keymetrics_scan(data=pq_data, low_color="#4169E1", mid_color="#F1CC9E", high_color="#D8182A", textsize=12)
    # Generates a heatmap using the low mid and high color palette with font size 12.

    """    
    # Default group handling
    if hrvar is None:
        data['Total'] = 'Total'
        hrvar = 'Total'

    # Filter metrics present in the data
    metrics = [metric for metric in metrics if metric in data.columns]

    # Compute summary table
    summary_table = (
        data.groupby([hrvar, 'PersonId'])[metrics]
        .mean()
        .groupby(hrvar)
        .mean()
        .reset_index()
    )

    # Add employee count
    employee_count = (
        data.groupby(hrvar)['PersonId']
        .nunique()
        .reset_index()
        .rename(columns={"PersonId": "Employee_Count"})
    )

    summary_table = summary_table.merge(employee_count, on=hrvar)
    summary_table = summary_table[summary_table['Employee_Count'] >= mingroup]

    # Melt the summary table for visualization
    # Add Employee_Count to metrics used in plotting
    summary_table_columns = [col for col in summary_table.columns if col != hrvar]
    summary_long = summary_table.melt(id_vars=[hrvar], var_name="variable", value_name="value")


    # Prepare the heatmap with row-wise normalization
    if return_type == "plot":
        variables = summary_long['variable'].unique()
        num_vars = len(variables)
        hrvar_categories = summary_long[hrvar].unique()
        cap_str = extract_date_range(data, return_type='text')

        title_text = "Key Metrics - Weekly Average"
        subtitle_text = f"By {hrvar.replace('_', ' ')}"

        fig, axes = plt.subplots(num_vars, 1, figsize=(10, plot_row_scaling_factor * num_vars), sharex=True)

        for i, variable in enumerate(variables):
            ax = axes[i] if num_vars > 1 else axes
            subset = summary_long[summary_long['variable'] == variable]
            values = subset['value'].values
            row_data = pd.DataFrame([values], columns=hrvar_categories)

            if variable == "Employee_Count":
                light_gray_cmap = LinearSegmentedColormap.from_list("gray_cmap", ["#FFFFFF", "#FFFFFF"])
                sns.heatmap(row_data,
                            annot=values.reshape(1, -1),
                            fmt=".0f",
                            cmap=light_gray_cmap,
                            cbar=False,
                            linewidths=0.5,
                            vmin=0,
                            vmax=1,
                            yticklabels=False,
                            ax=ax)
            else:
                row_min = subset['value'].min()
                row_max = subset['value'].max()
                normalized_values = (values - row_min) / (row_max - row_min)
                norm_data = pd.DataFrame([normalized_values], columns=hrvar_categories)

                custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", [low_color, mid_color, high_color])
                sns.heatmap(norm_data,
                            annot=values.reshape(1, -1),
                            fmt=".1f",
                            cmap=custom_cmap,
                            cbar=False,
                            linewidths=0.5,
                            vmin=0,
                            vmax=1,
                            yticklabels=False,
                            ax=ax)

            if i == 0:
                ax.xaxis.tick_top()
                ax.tick_params(axis='x', labeltop=True, labelbottom=False, labelrotation=45, pad=10)
            else:
                ax.tick_params(axis='x', bottom=False, labelbottom=False)

            ax.set_ylabel(us_to_space(variable), fontsize=textsize, rotation=0, labelpad=5, ha="right")
            ax.tick_params(left=False)

        fig.text(0.01, 0.995, title_text, fontsize=16, weight='bold', ha='left', va='top')
        fig.text(0.01, 0.905, subtitle_text, fontsize=12, ha='left', va='top', alpha=0.85)

        line = Line2D([0.01, 1.0], [0.85, 0.85], transform=fig.transFigure,
              color='#fe7f4f', linewidth=1.2, clip_on=False)
        fig.add_artist(line)

        rect = plt.Rectangle((0.01, 0.85), 0.03, -0.015,
                             transform=fig.transFigure,
                             facecolor='#fe7f4f',
                             clip_on=False,
                             linewidth=0)
        fig.add_artist(rect)

        fig.text(0.01, 0.01, cap_str, ha='left', fontsize=9, alpha=0.7)

        plt.tight_layout(rect=[0, 0.03, 1, 0.85])
        return fig

    elif return_type == "table":
        return summary_table

    else:
        raise ValueError("Invalid value for `return_type`. Choose either 'plot' or 'table'.")