# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
This script generate a summary of key metrics with options to return a heatmap or a summary table.
"""
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from vivainsights.extract_date_range import extract_date_range
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.colors import Normalize
from matplotlib.lines import Line2D
[docs]
def keymetrics_scan(data,
hrvar="Organization",
mingroup=5,
metrics=["Workweek_span",
"Collaboration_hours",
"After_hours_collaboration_hours",
"Meetings",
"Meeting_hours",
"After_hours_meeting_hours",
"Low_quality_meeting_hours",
"Meeting_hours_with_manager_1_on_1",
"Meeting_hours_with_manager",
"Emails_sent",
"Email_hours",
"After_hours_email_hours",
"Generated_workload_email_hours",
"Total_focus_hours",
"Internal_network_size",
"Networking_outside_organization",
"External_network_size",
"Networking_outside_company"],
return_type="plot",
low_color="#4169E1",
mid_color="#F1CC9E",
high_color="#D8182A",
textsize=10,
plot_row_scaling_factor=0.8):
"""
Name
----
keymetrics_scan
Description
------------
Generate a summary of key metrics with options to return a heatmap or a summary table.
Parameters
----------
data : pandas.DataFrame
A Person Query dataset in the form of a pandas dataframe.
hrvar : str, optional
The column name to group the data by. Defaults to `"Organization"`.
mingroup : int, optional
The minimum number of employees required to include a group in the analysis. Defaults to `5`.
metrics : list of str, optional
A list of metric column names to calculate averages for. Defaults to:
- `"Workweek_span"`
- `"Collaboration_hours"`
- `"After_hours_collaboration_hours"`
- `"Meetings"`
- `"Meeting_hours"`
- `"After_hours_meeting_hours"`
- `"Low_quality_meeting_hours"`
- `"Meeting_hours_with_manager_1_on_1"`
- `"Meeting_hours_with_manager"`
- `"Emails_sent"`
- `"Email_hours"`
- `"After_hours_email_hours"`
- `"Generated_workload_email_hours"`
- `"Total_focus_hours"`
- `"Internal_network_size"`
- `"Networking_outside_organization"`
- `"External_network_size"`
- `"Networking_outside_company"`
return_type : str, optional
Specifies the type of output to return. Valid values are:
- `"plot"` (default): Generate a heatmap visualization.
- `"table"`: Return a summary table as a pandas DataFrame.
mid_color : str, optional
high_color: str, optional
low_color : str, optional
Color codes for low, mid, and high values in the heatmap. Defaults to:
- `low_color="#4169E1"` (blue)
- `mid_color="#F1CC9E"` (beige)
- `high_color="#D8182A"` (red)
Color codes for low, mid, and high values in the heatmap. Can be set to:
- `Black: "#000000"`
- `White: "#FFFFFF"`
- `Red: "#FF0000"`
- `Lime: "#00FF00"`
- `Blue: "#0000FF"`
- `Yellow: "#FFFF00"`
- `Cyan/Aqua: "#00FFFF"`
- `Magenta/Fuchsia: "#FF00FF"`
- `Silver: "#C0C0C0"`
- `Gray: "#808080"`
- `Maroon: "#800000"`
- `Olive: "#808000"`
- `Green: "#008000"`
- `Purple: "#800080"`
- `Teal: "#008080"`
- `Navy: "#000080"`
- `Light Gray: "#D3D3D3"`
- `Dark Gray: "#A9A9A9"`
- `Dim Gray: "#696969"`
- `Slate Gray: "#708090"`
- `Light Slate Gray: "#778899"`
- `Crimson: "#DC143C"`
- `Coral: "#FF7F50"`
- `Tomato: "#FF6347"`
- `Orange: "#FFA500"`
- `Gold: "#FFD700"`
- `Dark Orange: "#FF8C00"`
- `Light Salmon: "#FFA07A"`
- `Dodger Blue: "#1E90FF"`
- `Sky Blue: "#87CEEB"`
- `Steel Blue: "#4682B4"`
- `Light Blue: "#ADD8E6"`
- `Dark Blue: "#00008B"`
- `Medium Blue: "#0000CD"`
- `Royal Blue: "#4169E1"`
- `Sienna: "#A0522D"`
- `Saddle Brown: "#8B4513"`
- `Chocolate: "#D2691E"`
- `Peru: "#CD853F"`
- `Sandy Brown: "#F4A460"`
- `Tan: "#D2B48C"`
- `Lavender: "#E6E6FA"`
- `Thistle: "#D8BFD8"`
- `Plum: "#DDA0DD"`
- `Orchid: "#DA70D6"`
- `Peach Puff: "#FFDAB9"`
- `Mint Cream: "#F5FFFA"`
- `Forest Green: "#228B22"`
- `Sea Green: "#2E8B57"`
- `Medium Sea Green: "#3CB371"`
- `Spring Green: "#00FF7F"`
- `Pale Green: "#98FB98"`
- `Indian Red: "#CD5C5C"`
- `Rosy Brown: "#BC8F8F"`
- `Hot Pink: "#FF69B4"`
- `Deep Pink: "#FF1493"`
- `Light Pink: "#FFB6C1"`
- `Midnight Blue: "#191970"`
- `Cornflower Blue: "#6495ED"`
- `Powder Blue: "#B0E0E6"`
- `Light Sky Blue: "#87CEFA"`
textsize : int, optional
Font size for text elements in the heatmap. Defaults to `10`.
Returns
-------
- If `return_type="plot"`: Displays a heatmap visualization of the rescaled key metrics grouped by the specified HR attribute.
- If `return_type="table"`: Returns a pandas DataFrame containing a summary table of average metric values grouped by the specified HR attribute.
Raises
------
ValueError
- If none of the specified metrics are present in the dataset.
- If no data is available after applying the `mingroup` filter.
Examples
--------
>>> import vivainsights as vi
>>> pq_data = vi.load_pq_data()
>>> vi.keymetrics_scan(data=pq_data, hrvar="Organization", mingroup=10, return_type="table")
# Returns a summary table grouped by "Team" with a minimum group size of 10.
>>> vi.keymetrics_scan(data=pq_data, hrvar="Organization", metrics=["Workweek_span", "Meeting_hours"], return_type="plot")
# Displays a heatmap of the rescaled "Workweek_span" and "Meeting_hours" metrics grouped by "Department".
>>> vi.keymetrics_scan(data=pq_data, low_color="#4169E1", mid_color="#F1CC9E", high_color="#D8182A", textsize=12)
# Generates a heatmap using the low mid and high color palette with font size 12.
"""
# Default group handling
if hrvar is None:
data['Total'] = 'Total'
hrvar = 'Total'
# Filter metrics present in the data
metrics = [metric for metric in metrics if metric in data.columns]
# Compute summary table
summary_table = (
data.groupby([hrvar, 'PersonId'])[metrics]
.mean()
.groupby(hrvar)
.mean()
.reset_index()
)
# Add employee count
employee_count = (
data.groupby(hrvar)['PersonId']
.nunique()
.reset_index()
.rename(columns={"PersonId": "Employee_Count"})
)
summary_table = summary_table.merge(employee_count, on=hrvar)
summary_table = summary_table[summary_table['Employee_Count'] >= mingroup]
# Melt the summary table for visualization
summary_long = (
summary_table.melt(id_vars=[hrvar], var_name="variable", value_name="value")
)
# Prepare the heatmap with row-wise normalization
if return_type == "plot":
variables = summary_long['variable'].unique()
num_vars = len(variables)
hrvar_categories = summary_long[hrvar].unique()
cap_str = extract_date_range(data, return_type='text')
title_text = "Key Metrics - Weekly Average"
subtitle_text = f"By {hrvar.replace('_', ' ')}"
fig, axes = plt.subplots(num_vars, 1, figsize=(10, plot_row_scaling_factor * num_vars), sharex=True)
for i, variable in enumerate(variables):
custom_cmap = LinearSegmentedColormap.from_list("custom_cmap", [low_color, mid_color, high_color])
ax = axes[i] if num_vars > 1 else axes
subset = summary_long[summary_long['variable'] == variable]
row_min = subset['value'].min()
row_max = subset['value'].max()
normalized_values = (subset['value'] - row_min) / (row_max - row_min)
heatmap_data = pd.DataFrame([normalized_values.values], columns=hrvar_categories)
sns.heatmap(heatmap_data,
annot=subset['value'].values.reshape(1, -1),
fmt=".1f",
cmap=custom_cmap,
cbar=False,
linewidths=0.5,
vmin=0,
vmax=1,
yticklabels=False,
ax=ax)
if i == 0:
ax.xaxis.tick_top()
ax.tick_params(axis='x', labeltop=True, labelbottom=False, labelrotation=45, pad=10)
else:
ax.tick_params(axis='x', bottom=False, labelbottom=False)
ax.set_ylabel(variable, fontsize=textsize, rotation=0, labelpad=5, ha="right")
ax.tick_params(left=False)
fig.text(0.01, 0.995, title_text, fontsize=16, weight='bold', ha='left', va='top')
fig.text(0.01, 0.965, subtitle_text, fontsize=12, ha='left', va='top', alpha=0.85)
line = Line2D([0.01, 1.0], [0.910, 0.910], transform=fig.transFigure,
color='#fe7f4f', linewidth=1.2, clip_on=False)
fig.add_artist(line)
rect = plt.Rectangle((0.01, 0.910), 0.03, -0.015,
transform=fig.transFigure,
facecolor='#fe7f4f',
clip_on=False,
linewidth=0)
fig.add_artist(rect)
fig.text(0.01, 0.01, cap_str, ha='left', fontsize=9, alpha=0.7)
plt.tight_layout(rect=[0, 0.03, 1, 0.93])
return fig
elif return_type == "table":
return summary_table
else:
raise ValueError("Invalid value for `return_type`. Choose either 'plot' or 'table'.")