# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
create_radar: Parameterized radar-chart workflow (calc + viz + wrapper),
in the same spirit as create_bar.
Core design
-----------
- General-purpose: works with any HR attribute column.
- Calculation pipeline:
* person-level aggregation within each group
* group-level aggregation
* minimum group size (`mingroup`)
* indexing modes: "total", "none", "ref_group", "minmax"
- Returns either a plot or a table.
Typical usage
-------------
>>> import vivainsights as vi
>>> from vivainsights.create_radar import create_radar
>>>
>>> pq_data = vi.load_pq_data()
>>> fig = create_radar(
... data=pq_data,
... metrics=[
... "Copilot_actions_taken_in_Teams",
... "Collaboration_hours",
... "After_hours_collaboration_hours",
... "Internal_network_size",
... ],
... hrvar="Organization",
... )
Return the indexed table instead of a plot:
>>> tbl = create_radar(
... data=pq_data,
... metrics=["Collaboration_hours", "Meetings_count"],
... hrvar="Organization",
... return_type="table",
... )
Reference a specific group as 100 (ref_group indexing):
>>> fig = create_radar(
... data=pq_data,
... metrics=["Collaboration_hours", "Meetings_count"],
... hrvar="Organization",
... index_mode="ref_group",
... index_ref_group="Contoso Ltd",
... )
Min-max scaling to [0,100] within observed group ranges:
>>> fig = create_radar(
... data=pq_data,
... metrics=["Collaboration_hours", "Meetings_count", "Focus_hours"],
... hrvar="Organization",
... index_mode="minmax",
... )
"""
from typing import List, Optional, Tuple, Literal, Union
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from vivainsights.extract_date_range import extract_date_range
from vivainsights.us_to_space import us_to_space
# Try vivainsights highlight color; fall back to hex
try:
from vivainsights.color_codes import Colors
_HIGHLIGHT = Colors.HIGHLIGHT_NEGATIVE.value
except Exception:
_HIGHLIGHT = "#fe7f4f"
# Header layout constants (aligned with other visuals)
_TITLE_Y = 0.955
_SUB_Y = 0.915
_RULE_Y = 0.900
_TOP_LIMIT = 0.80
__all__ = ["create_radar_calc", "create_radar_viz", "create_radar"]
# --------------------------------------------------------------------
# Figure-level header helpers
# --------------------------------------------------------------------
def _retitle_left(fig, title_text: Optional[str], subtitle_text: Optional[str] = None, left: float = 0.01) -> None:
"""Left-aligned figure-level title/subtitle; clear any axes titles/supertitle."""
for ax in fig.get_axes():
try:
ax.set_title("")
except Exception:
pass
if getattr(fig, "_suptitle", None) is not None:
fig._suptitle.set_visible(False)
if title_text:
fig.text(left, _TITLE_Y, title_text, ha="left", fontsize=13, weight="bold", alpha=.8)
if subtitle_text:
fig.text(left, _SUB_Y, subtitle_text, ha="left", fontsize=11, alpha=.8)
def _add_header_decoration(fig, color: str = _HIGHLIGHT, y: float = _RULE_Y) -> None:
"""Colored rule + small box under the subtitle."""
overlay = fig.add_axes([0, 0, 1, 1], frameon=False, zorder=10)
overlay.set_axis_off()
overlay.add_line(
Line2D([0.01, 1.0], [y, y], transform=overlay.transAxes, color=color, linewidth=1.2)
)
overlay.add_patch(
plt.Rectangle(
(0.01, y),
0.03,
-0.015,
transform=overlay.transAxes,
facecolor=color,
linewidth=0,
)
)
def _reserve_header_space(fig, top: float = _TOP_LIMIT) -> None:
"""Push axes down so the header never overlaps."""
try:
if hasattr(fig, "get_constrained_layout") and fig.get_constrained_layout():
fig.set_constrained_layout(False)
except Exception:
pass
fig.subplots_adjust(top=top)
# --------------------------------------------------------------------
# 1) CALC
# --------------------------------------------------------------------
IndexMode = Literal["total", "none", "ref_group", "minmax"]
[docs]
def create_radar_calc(
data: pd.DataFrame,
metrics: List[str],
hrvar: str,
id_col: str = "PersonId",
mingroup: int = 5,
agg: Literal["mean", "median"] = "mean",
index_mode: IndexMode = "total",
index_ref_group: Optional[str] = None,
dropna: bool = True,
) -> Tuple[pd.DataFrame, pd.Series]:
"""
Name
----
create_radar_calc
Description
-----------
Compute group-level metric values and (optionally) index them for radar plotting.
Steps:
1. Aggregate to person-level within each group (mean/median).
2. Aggregate the person-level values to the group level.
3. Enforce a minimum person count per group (`mingroup`).
4. Apply an indexing mode to make metrics comparable.
Parameters
----------
data : pd.DataFrame
Standard Person Query data frame containing `metrics`, `hrvar`, and `id_col`.
metrics : List[str]
Numeric metric column names to summarise and index for the radar chart.
hrvar : str
HR attribute column identifying the group for each person
(e.g., "Organization", "LevelDesignation").
id_col : str, default "PersonId"
Column uniquely identifying people for person-level aggregation.
mingroup : int, default 5
Minimum number of unique people required in a group to retain it.
agg : {"mean","median"}, default "mean"
Aggregation function for both person-level and group-level summaries.
index_mode : {"total","none","ref_group","minmax"}, default "total"
- "total": index each metric vs. the overall person-level average (Total = 100).
- "ref_group": index vs. a specific group given by `index_ref_group` (Ref = 100).
- "minmax": scale to [0,100] within the min-max of observed group values (per metric).
- "none": return raw (unindexed) group values.
index_ref_group : Optional[str], default None
Required when `index_mode="ref_group"`. Name of the group to serve as reference (=100).
dropna : bool, default True
If True, drop rows with NA in any of `[id_col, hrvar] + metrics` prior to aggregation.
Returns
-------
(group_level_indexed, ref) : Tuple[pd.DataFrame, pd.Series]
group_level_indexed
One row per group, wide across `metrics`. Values are indexed/scaled as per
`index_mode`.
ref
The reference used for indexing:
- For "total" / "ref_group": a pd.Series of reference means/medians.
- For "minmax": a two-column DataFrame with per-metric min and max.
- For "none": empty Series.
"""
if not metrics:
raise ValueError("`metrics` must be a non-empty list of column names.")
required_cols = [id_col, hrvar] + metrics
missing = [c for c in required_cols if c not in data.columns]
if missing:
raise KeyError(f"Missing required column(s): {missing}")
df = data[required_cols].copy()
if dropna:
df = df.dropna(subset=required_cols)
# Person-level aggregation within group
if agg == "mean":
person_level = (
df.groupby([id_col, hrvar])[metrics]
.mean()
.reset_index()
)
elif agg == "median":
person_level = (
df.groupby([id_col, hrvar])[metrics]
.median()
.reset_index()
)
else:
raise ValueError("`agg` must be 'mean' or 'median'.")
# Group-level aggregation across people
if agg == "mean":
group_level = (
person_level.groupby(hrvar)[metrics]
.mean()
.reset_index()
)
else:
group_level = (
person_level.groupby(hrvar)[metrics]
.median()
.reset_index()
)
# Enforce mingroup (by unique people per group)
counts = (
person_level.groupby(hrvar)[id_col]
.nunique()
.rename("n")
)
group_level = group_level.merge(counts, on=hrvar, how="left")
group_level = group_level[group_level["n"] >= mingroup].copy()
group_level.drop(columns=["n"], inplace=True)
if group_level.empty:
ref = pd.Series(dtype=float)
return group_level, ref
# Compute reference for indexing
if index_mode == "total":
ref = (
person_level[metrics].mean()
if agg == "mean"
else person_level[metrics].median()
)
elif index_mode == "ref_group":
if index_ref_group is None:
raise ValueError("index_ref_group must be provided when index_mode='ref_group'.")
ref_row = group_level.loc[group_level[hrvar] == index_ref_group]
if ref_row.empty:
raise ValueError(f"Reference group '{index_ref_group}' not found in {hrvar}.")
ref = ref_row[metrics].iloc[0]
elif index_mode == "minmax":
mins = group_level[metrics].min()
maxs = group_level[metrics].max()
ref = pd.concat({"min": mins, "max": maxs}, axis=1)
elif index_mode == "none":
ref = pd.Series(dtype=float)
else:
raise ValueError("index_mode must be one of: 'total', 'none', 'ref_group', 'minmax'.")
# Indexing
group_level_indexed = group_level.copy()
if index_mode in ("total", "ref_group"):
for m in metrics:
denom = ref[m] if (hasattr(ref, "__getitem__") and m in ref) else np.nan
if pd.isna(denom) or denom == 0:
warnings.warn(
f"Reference value for metric '{m}' is {denom}; "
"indexed values set to 100 (neutral baseline) for this metric.",
RuntimeWarning,
stacklevel=3,
)
group_level_indexed[m] = 100.0
else:
group_level_indexed[m] = (group_level_indexed[m] / denom) * 100.0
elif index_mode == "minmax":
mins = ref["min"]
maxs = ref["max"]
for m in metrics:
den = (maxs[m] - mins[m])
group_level_indexed[m] = 100.0 * (
group_level_indexed[m] - mins[m]
) / (den if den != 0 else 1.0)
# else: "none" -> leave raw values
return group_level_indexed, ref
# --------------------------------------------------------------------
# 2) VIZ
# --------------------------------------------------------------------
[docs]
def create_radar_viz(
data: pd.DataFrame,
metrics: List[str],
hrvar: str,
fill_missing: str = "zero",
figsize: Tuple[float, float] = (8, 6),
title: Optional[str] = None,
subtitle: Optional[str] = None,
caption: Optional[str] = None,
) -> plt.Figure:
"""
Name
----
create_radar_viz
Description
-----------
Render a radar (spider) chart from a wide, group-level table produced by
`create_radar_calc`. Each row in `data` is plotted as a polygon across the
supplied `metrics` in the given order.
Parameters
----------
data : pd.DataFrame
One row per group, columns include `hrvar` and each of `metrics`.
Values should already be indexed/scaled to comparable units (i.e. the
output of `create_radar_calc`).
metrics : List[str]
Ordered list of metric columns to plot around the radar.
hrvar : str
Column containing the group labels used in the legend.
fill_missing : str, default "zero"
How to handle NA values in `data` before plotting:
- "zero": replace NA with 0 so polygons close correctly.
- "none": leave NA as-is (polygon may not render for that group).
figsize : Tuple[float, float], default (8, 6)
Matplotlib figure size in inches (width, height).
title : Optional[str], default None
Top title for the figure.
subtitle : Optional[str], default None
Optional smaller line beneath the title (figure-level, not axes).
caption : Optional[str], default None
Small text near the bottom of the figure (e.g., date range).
Returns
-------
fig : matplotlib.figure.Figure
The constructed matplotlib Figure.
"""
if data.empty:
raise ValueError("`data` is empty - nothing to plot.")
if fill_missing not in ("zero", "none"):
raise ValueError(f"`fill_missing` must be 'zero' or 'none', got {fill_missing!r}.")
num_vars = len(metrics)
if num_vars == 0:
raise ValueError("`metrics` must be a non-empty list.")
# Angles
angles = [n / float(num_vars) * 2 * np.pi for n in range(num_vars)]
angles += angles[:1]
fig, ax = plt.subplots(figsize=figsize, subplot_kw=dict(polar=True))
groups = list(data[hrvar].astype(str).unique())
# Plot each group
for grp in groups:
row = data.loc[data[hrvar].astype(str) == grp]
if row.empty:
continue
vals = row[metrics].iloc[0].to_list()
if fill_missing == "zero":
vals = [0.0 if pd.isna(v) else float(v) for v in vals]
else: # "none" — keep missing as NaN so the polygon renders honestly
vals = [np.nan if pd.isna(v) else float(v) for v in vals]
vals += vals[:1]
ax.plot(angles, vals, label=grp, linewidth=1.5)
ax.fill(angles, vals, alpha=0.10)
# Formatting
ax.set_theta_offset(np.pi / 2)
ax.set_theta_direction(-1)
axis_labels = [us_to_space(m) for m in metrics]
ax.set_thetagrids([a * 180 / np.pi for a in angles[:-1]], axis_labels)
# Bottom caption (left-aligned, matching the title anchor)
if caption:
fig.text(0.01, 0.01, caption, ha="left", va="center", fontsize=9)
# Legend (outside on the right) — only when labeled artists exist
if ax.get_legend_handles_labels()[0]:
ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))
# Let Matplotlib tighten elements first...
plt.tight_layout()
# Figure-level header styling
_retitle_left(fig, title, subtitle, left=0.01)
_add_header_decoration(fig) # rule + box
_reserve_header_space(fig) # push axes down to avoid overlap
return fig
# --------------------------------------------------------------------
# 3) WRAPPER
# --------------------------------------------------------------------
ReturnType = Literal["plot", "table"]
[docs]
def create_radar(
data: pd.DataFrame,
metrics: List[str],
hrvar: str = "Organization",
id_col: str = "PersonId",
mingroup: int = 5,
agg: Literal["mean", "median"] = "mean",
index_mode: IndexMode = "total",
index_ref_group: Optional[str] = None,
dropna: bool = False,
return_type: ReturnType = "plot",
figsize: Tuple[float, float] = (8, 6),
title: Optional[str] = None,
subtitle: Optional[str] = None,
caption: Optional[str] = None,
) -> Union[plt.Figure, pd.DataFrame]:
"""
Name
----
create_radar
Description
-----------
High-level convenience wrapper to compute group-level metrics and either:
(a) return the indexed table (return_type="table"), or
(b) render a radar chart (return_type="plot").
Parameters
----------
data : pd.DataFrame
Standard Person Query data frame containing at least `metrics`, `id_col`,
and `hrvar`.
metrics : List[str]
Numeric metric columns to visualise (order determines the radar axes).
hrvar : str, default "Organization"
HR attribute column used for grouping (e.g., "Organization", "LevelDesignation").
id_col : str, default "PersonId"
Unique person identifier for person-level aggregation.
mingroup : int, default 5
Minimum unique person count per group.
agg : {"mean","median"}, default "mean"
Aggregation function for person- and group-level summaries.
index_mode : {"total","none","ref_group","minmax"}, default "total"
Indexing/scaling mode applied to group values prior to plotting.
index_ref_group : Optional[str], default None
Required when `index_mode="ref_group"`. The name of the group that will be
fixed at 100.
dropna : bool, default False
Drop rows with NA in required columns prior to aggregation.
return_type : {"plot","table"}, default "plot"
- "plot": return a matplotlib Figure.
- "table": return the indexed group-level DataFrame.
figsize : Tuple[float, float], default (8, 6)
Figure size for the plot (ignored when return_type="table").
title : Optional[str], default None
Plot title. If None, a default title is inferred based on `index_mode`.
subtitle : Optional[str], default None
Optional subtitle line.
caption : Optional[str], default None
Additional caption text appended after the auto-generated date range and
index label, e.g. "caption" → "Data from … | Index: … | caption".
If None, only the date range and index label are shown.
Returns
-------
matplotlib.figure.Figure or pd.DataFrame
- If `return_type="plot"`: a Figure containing the radar chart.
- If `return_type="table"`: the group-level indexed DataFrame.
"""
if return_type not in ("plot", "table"):
raise ValueError(f"return_type must be 'plot' or 'table', got {return_type!r}.")
df = data.copy()
if hrvar not in df.columns:
raise KeyError(f"hrvar '{hrvar}' not found in data.")
# Index method label (mirrors R: index_label in create_radar)
_index_labels = {
"total": "Index: population average = 100",
"minmax": "Index: min-max scaled [0, 100]",
"none": "Raw values (no indexing)",
}
if index_mode == "ref_group":
ref_name = index_ref_group or ""
index_label = f"Index: {ref_name} = 100"
else:
index_label = _index_labels.get(index_mode, "")
# Build caption: "<date range> | <index label>" (always auto-generated)
auto_caption = ""
try:
auto_caption = extract_date_range(df, return_type="text")
except Exception:
pass
auto_caption = f"{auto_caption} | {index_label}" if auto_caption else index_label
# Append any user-supplied extra text
caption_final = f"{auto_caption} | {caption}" if caption else auto_caption
# Compute group-level table
table, _ = create_radar_calc(
data=df,
metrics=metrics,
hrvar=hrvar,
id_col=id_col,
mingroup=mingroup,
agg=agg,
index_mode=index_mode,
index_ref_group=index_ref_group,
dropna=dropna,
)
if return_type == "table":
return table
# Default title/subtitle
if title is None:
base_title = "Behavioral Profiles by Group"
if index_mode in ("total", "ref_group"):
base_title += " (Indexed)"
elif index_mode == "minmax":
base_title += " (Min-Max Scaled)"
else:
base_title = title
if subtitle is None:
subtitle_effective = f"Radar view across metrics by {hrvar}"
else:
subtitle_effective = subtitle
fig = create_radar_viz(
data=table,
metrics=metrics,
hrvar=hrvar,
figsize=figsize,
title=base_title,
subtitle=subtitle_effective,
caption=caption_final,
)
return fig