Source code for vivainsights.create_radar

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------

"""
create_radar: Parameterized radar-chart workflow (calc + viz + wrapper),
in the same spirit as create_bar.

Core design
-----------
- General-purpose: works with any HR attribute column.
- Calculation pipeline:
    * person-level aggregation within each group
    * group-level aggregation
    * minimum group size (`mingroup`)
    * indexing modes: "total", "none", "ref_group", "minmax"
- Returns either a plot or a table.

Typical usage
-------------

>>> import vivainsights as vi
>>> from vivainsights.create_radar import create_radar
>>>
>>> pq_data = vi.load_pq_data()
>>> fig = create_radar(
...     data=pq_data,
...     metrics=[
...         "Copilot_actions_taken_in_Teams",
...         "Collaboration_hours",
...         "After_hours_collaboration_hours",
...         "Internal_network_size",
...     ],
...     hrvar="Organization",
... )

Return the indexed table instead of a plot:

>>> tbl = create_radar(
...     data=pq_data,
...     metrics=["Collaboration_hours", "Meetings_count"],
...     hrvar="Organization",
...     return_type="table",
... )

Reference a specific group as 100 (ref_group indexing):

>>> fig = create_radar(
...     data=pq_data,
...     metrics=["Collaboration_hours", "Meetings_count"],
...     hrvar="Organization",
...     index_mode="ref_group",
...     index_ref_group="Contoso Ltd",
... )

Min-max scaling to [0,100] within observed group ranges:

>>> fig = create_radar(
...     data=pq_data,
...     metrics=["Collaboration_hours", "Meetings_count", "Focus_hours"],
...     hrvar="Organization",
...     index_mode="minmax",
... )
"""

from typing import List, Optional, Tuple, Literal, Union
import warnings

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from vivainsights.extract_date_range import extract_date_range
from vivainsights.us_to_space import us_to_space

# Try vivainsights highlight color; fall back to hex
try:
    from vivainsights.color_codes import Colors
    _HIGHLIGHT = Colors.HIGHLIGHT_NEGATIVE.value
except Exception:
    _HIGHLIGHT = "#fe7f4f"

# Header layout constants (aligned with other visuals)
_TITLE_Y = 0.955
_SUB_Y = 0.915
_RULE_Y = 0.900
_TOP_LIMIT = 0.80

__all__ = ["create_radar_calc", "create_radar_viz", "create_radar"]


# --------------------------------------------------------------------
# Figure-level header helpers
# --------------------------------------------------------------------
def _retitle_left(fig, title_text: Optional[str], subtitle_text: Optional[str] = None, left: float = 0.01) -> None:
    """Left-aligned figure-level title/subtitle; clear any axes titles/supertitle."""
    for ax in fig.get_axes():
        try:
            ax.set_title("")
        except Exception:
            pass

    if getattr(fig, "_suptitle", None) is not None:
        fig._suptitle.set_visible(False)

    if title_text:
        fig.text(left, _TITLE_Y, title_text, ha="left", fontsize=13, weight="bold", alpha=.8)
    if subtitle_text:
        fig.text(left, _SUB_Y, subtitle_text, ha="left", fontsize=11, alpha=.8)


def _add_header_decoration(fig, color: str = _HIGHLIGHT, y: float = _RULE_Y) -> None:
    """Colored rule + small box under the subtitle."""
    overlay = fig.add_axes([0, 0, 1, 1], frameon=False, zorder=10)
    overlay.set_axis_off()
    overlay.add_line(
        Line2D([0.01, 1.0], [y, y], transform=overlay.transAxes, color=color, linewidth=1.2)
    )
    overlay.add_patch(
        plt.Rectangle(
            (0.01, y),
            0.03,
            -0.015,
            transform=overlay.transAxes,
            facecolor=color,
            linewidth=0,
        )
    )


def _reserve_header_space(fig, top: float = _TOP_LIMIT) -> None:
    """Push axes down so the header never overlaps."""
    try:
        if hasattr(fig, "get_constrained_layout") and fig.get_constrained_layout():
            fig.set_constrained_layout(False)
    except Exception:
        pass

    fig.subplots_adjust(top=top)


# --------------------------------------------------------------------
# 1) CALC
# --------------------------------------------------------------------
IndexMode = Literal["total", "none", "ref_group", "minmax"]



[docs]
def create_radar_calc(
    data: pd.DataFrame,
    metrics: List[str],
    hrvar: str,
    id_col: str = "PersonId",
    mingroup: int = 5,
    agg: Literal["mean", "median"] = "mean",
    index_mode: IndexMode = "total",
    index_ref_group: Optional[str] = None,
    dropna: bool = True,
) -> Tuple[pd.DataFrame, pd.Series]:
    """
    Name
    ----
    create_radar_calc

    Description
    -----------
    Compute group-level metric values and (optionally) index them for radar plotting.

    Steps:
      1. Aggregate to person-level within each group (mean/median).
      2. Aggregate the person-level values to the group level.
      3. Enforce a minimum person count per group (`mingroup`).
      4. Apply an indexing mode to make metrics comparable.

    Parameters
    ----------
    data : pd.DataFrame
        Standard Person Query data frame containing `metrics`, `hrvar`, and `id_col`.
    metrics : List[str]
        Numeric metric column names to summarise and index for the radar chart.
    hrvar : str
        HR attribute column identifying the group for each person
        (e.g., "Organization", "LevelDesignation").
    id_col : str, default "PersonId"
        Column uniquely identifying people for person-level aggregation.
    mingroup : int, default 5
        Minimum number of unique people required in a group to retain it.
    agg : {"mean","median"}, default "mean"
        Aggregation function for both person-level and group-level summaries.
    index_mode : {"total","none","ref_group","minmax"}, default "total"
        - "total": index each metric vs. the overall person-level average (Total = 100).
        - "ref_group": index vs. a specific group given by `index_ref_group` (Ref = 100).
        - "minmax": scale to [0,100] within the min-max of observed group values (per metric).
        - "none": return raw (unindexed) group values.
    index_ref_group : Optional[str], default None
        Required when `index_mode="ref_group"`. Name of the group to serve as reference (=100).
    dropna : bool, default True
        If True, drop rows with NA in any of `[id_col, hrvar] + metrics` prior to aggregation.

    Returns
    -------
    (group_level_indexed, ref) : Tuple[pd.DataFrame, pd.Series]
        group_level_indexed
            One row per group, wide across `metrics`. Values are indexed/scaled as per
            `index_mode`.
        ref
            The reference used for indexing:
            - For "total" / "ref_group": a pd.Series of reference means/medians.
            - For "minmax": a two-column DataFrame with per-metric min and max.
            - For "none": empty Series.
    """
    if not metrics:
        raise ValueError("`metrics` must be a non-empty list of column names.")

    required_cols = [id_col, hrvar] + metrics
    missing = [c for c in required_cols if c not in data.columns]
    if missing:
        raise KeyError(f"Missing required column(s): {missing}")

    df = data[required_cols].copy()

    if dropna:
        df = df.dropna(subset=required_cols)

    # Person-level aggregation within group
    if agg == "mean":
        person_level = (
            df.groupby([id_col, hrvar])[metrics]
            .mean()
            .reset_index()
        )
    elif agg == "median":
        person_level = (
            df.groupby([id_col, hrvar])[metrics]
            .median()
            .reset_index()
        )
    else:
        raise ValueError("`agg` must be 'mean' or 'median'.")

    # Group-level aggregation across people
    if agg == "mean":
        group_level = (
            person_level.groupby(hrvar)[metrics]
            .mean()
            .reset_index()
        )
    else:
        group_level = (
            person_level.groupby(hrvar)[metrics]
            .median()
            .reset_index()
        )

    # Enforce mingroup (by unique people per group)
    counts = (
        person_level.groupby(hrvar)[id_col]
        .nunique()
        .rename("n")
    )
    group_level = group_level.merge(counts, on=hrvar, how="left")
    group_level = group_level[group_level["n"] >= mingroup].copy()
    group_level.drop(columns=["n"], inplace=True)

    if group_level.empty:
        ref = pd.Series(dtype=float)
        return group_level, ref

    # Compute reference for indexing
    if index_mode == "total":
        ref = (
            person_level[metrics].mean()
            if agg == "mean"
            else person_level[metrics].median()
        )
    elif index_mode == "ref_group":
        if index_ref_group is None:
            raise ValueError("index_ref_group must be provided when index_mode='ref_group'.")
        ref_row = group_level.loc[group_level[hrvar] == index_ref_group]
        if ref_row.empty:
            raise ValueError(f"Reference group '{index_ref_group}' not found in {hrvar}.")
        ref = ref_row[metrics].iloc[0]
    elif index_mode == "minmax":
        mins = group_level[metrics].min()
        maxs = group_level[metrics].max()
        ref = pd.concat({"min": mins, "max": maxs}, axis=1)
    elif index_mode == "none":
        ref = pd.Series(dtype=float)
    else:
        raise ValueError("index_mode must be one of: 'total', 'none', 'ref_group', 'minmax'.")

    # Indexing
    group_level_indexed = group_level.copy()
    if index_mode in ("total", "ref_group"):
        for m in metrics:
            denom = ref[m] if (hasattr(ref, "__getitem__") and m in ref) else np.nan
            if pd.isna(denom) or denom == 0:
                warnings.warn(
                    f"Reference value for metric '{m}' is {denom}; "
                    "indexed values set to 100 (neutral baseline) for this metric.",
                    RuntimeWarning,
                    stacklevel=3,
                )
                group_level_indexed[m] = 100.0
            else:
                group_level_indexed[m] = (group_level_indexed[m] / denom) * 100.0
    elif index_mode == "minmax":
        mins = ref["min"]
        maxs = ref["max"]
        for m in metrics:
            den = (maxs[m] - mins[m])
            group_level_indexed[m] = 100.0 * (
                group_level_indexed[m] - mins[m]
            ) / (den if den != 0 else 1.0)
    # else: "none" -> leave raw values

    return group_level_indexed, ref



# --------------------------------------------------------------------
# 2) VIZ
# --------------------------------------------------------------------

[docs]
def create_radar_viz(
    data: pd.DataFrame,
    metrics: List[str],
    hrvar: str,
    fill_missing: str = "zero",
    figsize: Tuple[float, float] = (8, 6),
    title: Optional[str] = None,
    subtitle: Optional[str] = None,
    caption: Optional[str] = None,
) -> plt.Figure:
    """
    Name
    ----
    create_radar_viz

    Description
    -----------
    Render a radar (spider) chart from a wide, group-level table produced by
    `create_radar_calc`. Each row in `data` is plotted as a polygon across the
    supplied `metrics` in the given order.

    Parameters
    ----------
    data : pd.DataFrame
        One row per group, columns include `hrvar` and each of `metrics`.
        Values should already be indexed/scaled to comparable units (i.e. the
        output of `create_radar_calc`).
    metrics : List[str]
        Ordered list of metric columns to plot around the radar.
    hrvar : str
        Column containing the group labels used in the legend.
    fill_missing : str, default "zero"
        How to handle NA values in `data` before plotting:
        - "zero": replace NA with 0 so polygons close correctly.
        - "none": leave NA as-is (polygon may not render for that group).
    figsize : Tuple[float, float], default (8, 6)
        Matplotlib figure size in inches (width, height).
    title : Optional[str], default None
        Top title for the figure.
    subtitle : Optional[str], default None
        Optional smaller line beneath the title (figure-level, not axes).
    caption : Optional[str], default None
        Small text near the bottom of the figure (e.g., date range).

    Returns
    -------
    fig : matplotlib.figure.Figure
        The constructed matplotlib Figure.
    """
    if data.empty:
        raise ValueError("`data` is empty - nothing to plot.")

    if fill_missing not in ("zero", "none"):
        raise ValueError(f"`fill_missing` must be 'zero' or 'none', got {fill_missing!r}.")

    num_vars = len(metrics)
    if num_vars == 0:
        raise ValueError("`metrics` must be a non-empty list.")

    # Angles
    angles = [n / float(num_vars) * 2 * np.pi for n in range(num_vars)]
    angles += angles[:1]

    fig, ax = plt.subplots(figsize=figsize, subplot_kw=dict(polar=True))

    groups = list(data[hrvar].astype(str).unique())

    # Plot each group
    for grp in groups:
        row = data.loc[data[hrvar].astype(str) == grp]
        if row.empty:
            continue

        vals = row[metrics].iloc[0].to_list()
        if fill_missing == "zero":
            vals = [0.0 if pd.isna(v) else float(v) for v in vals]
        else:  # "none" — keep missing as NaN so the polygon renders honestly
            vals = [np.nan if pd.isna(v) else float(v) for v in vals]
        vals += vals[:1]

        ax.plot(angles, vals, label=grp, linewidth=1.5)
        ax.fill(angles, vals, alpha=0.10)

    # Formatting
    ax.set_theta_offset(np.pi / 2)
    ax.set_theta_direction(-1)
    axis_labels = [us_to_space(m) for m in metrics]
    ax.set_thetagrids([a * 180 / np.pi for a in angles[:-1]], axis_labels)

    # Bottom caption (left-aligned, matching the title anchor)
    if caption:
        fig.text(0.01, 0.01, caption, ha="left", va="center", fontsize=9)

    # Legend (outside on the right) — only when labeled artists exist
    if ax.get_legend_handles_labels()[0]:
        ax.legend(loc="upper right", bbox_to_anchor=(1.3, 1.1))

    # Let Matplotlib tighten elements first...
    plt.tight_layout()

    # Figure-level header styling
    _retitle_left(fig, title, subtitle, left=0.01)
    _add_header_decoration(fig)   # rule + box
    _reserve_header_space(fig)    # push axes down to avoid overlap

    return fig



# --------------------------------------------------------------------
# 3) WRAPPER
# --------------------------------------------------------------------
ReturnType = Literal["plot", "table"]



[docs]
def create_radar(
    data: pd.DataFrame,
    metrics: List[str],
    hrvar: str = "Organization",
    id_col: str = "PersonId",
    mingroup: int = 5,
    agg: Literal["mean", "median"] = "mean",
    index_mode: IndexMode = "total",
    index_ref_group: Optional[str] = None,
    dropna: bool = False,
    return_type: ReturnType = "plot",
    figsize: Tuple[float, float] = (8, 6),
    title: Optional[str] = None,
    subtitle: Optional[str] = None,
    caption: Optional[str] = None,
) -> Union[plt.Figure, pd.DataFrame]:
    """
    Name
    ----
    create_radar

    Description
    -----------
    High-level convenience wrapper to compute group-level metrics and either:
      (a) return the indexed table (return_type="table"), or
      (b) render a radar chart (return_type="plot").

    Parameters
    ----------
    data : pd.DataFrame
        Standard Person Query data frame containing at least `metrics`, `id_col`,
        and `hrvar`.
    metrics : List[str]
        Numeric metric columns to visualise (order determines the radar axes).
    hrvar : str, default "Organization"
        HR attribute column used for grouping (e.g., "Organization", "LevelDesignation").
    id_col : str, default "PersonId"
        Unique person identifier for person-level aggregation.
    mingroup : int, default 5
        Minimum unique person count per group.
    agg : {"mean","median"}, default "mean"
        Aggregation function for person- and group-level summaries.
    index_mode : {"total","none","ref_group","minmax"}, default "total"
        Indexing/scaling mode applied to group values prior to plotting.
    index_ref_group : Optional[str], default None
        Required when `index_mode="ref_group"`. The name of the group that will be
        fixed at 100.
    dropna : bool, default False
        Drop rows with NA in required columns prior to aggregation.
    return_type : {"plot","table"}, default "plot"
        - "plot": return a matplotlib Figure.
        - "table": return the indexed group-level DataFrame.
    figsize : Tuple[float, float], default (8, 6)
        Figure size for the plot (ignored when return_type="table").
    title : Optional[str], default None
        Plot title. If None, a default title is inferred based on `index_mode`.
    subtitle : Optional[str], default None
        Optional subtitle line.
    caption : Optional[str], default None
        Additional caption text appended after the auto-generated date range and
        index label, e.g. "caption" → "Data from … | Index: … | caption".
        If None, only the date range and index label are shown.

    Returns
    -------
    matplotlib.figure.Figure or pd.DataFrame
        - If `return_type="plot"`: a Figure containing the radar chart.
        - If `return_type="table"`: the group-level indexed DataFrame.
    """
    if return_type not in ("plot", "table"):
        raise ValueError(f"return_type must be 'plot' or 'table', got {return_type!r}.")

    df = data.copy()

    if hrvar not in df.columns:
        raise KeyError(f"hrvar '{hrvar}' not found in data.")

    # Index method label (mirrors R: index_label in create_radar)
    _index_labels = {
        "total":     "Index: population average = 100",
        "minmax":    "Index: min-max scaled [0, 100]",
        "none":      "Raw values (no indexing)",
    }
    if index_mode == "ref_group":
        ref_name = index_ref_group or ""
        index_label = f"Index: {ref_name} = 100"
    else:
        index_label = _index_labels.get(index_mode, "")

    # Build caption: "<date range> | <index label>" (always auto-generated)
    auto_caption = ""
    try:
        auto_caption = extract_date_range(df, return_type="text")
    except Exception:
        pass
    auto_caption = f"{auto_caption} | {index_label}" if auto_caption else index_label
    # Append any user-supplied extra text
    caption_final = f"{auto_caption} | {caption}" if caption else auto_caption

    # Compute group-level table
    table, _ = create_radar_calc(
        data=df,
        metrics=metrics,
        hrvar=hrvar,
        id_col=id_col,
        mingroup=mingroup,
        agg=agg,
        index_mode=index_mode,
        index_ref_group=index_ref_group,
        dropna=dropna,
    )

    if return_type == "table":
        return table

    # Default title/subtitle
    if title is None:
        base_title = "Behavioral Profiles by Group"
        if index_mode in ("total", "ref_group"):
            base_title += " (Indexed)"
        elif index_mode == "minmax":
            base_title += " (Min-Max Scaled)"
    else:
        base_title = title

    if subtitle is None:
        subtitle_effective = f"Radar view across metrics by {hrvar}"
    else:
        subtitle_effective = subtitle

    fig = create_radar_viz(
        data=table,
        metrics=metrics,
        hrvar=hrvar,
        figsize=figsize,
        title=base_title,
        subtitle=subtitle_effective,
        caption=caption_final,
    )
    return fig