Source code for vivainsights.create_bar

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
The code defines a function `create_bar` that calculates and visualizes the mean of a selected
metric, grouped by a selected HR variable. 

The metrics are first aggregated at a user-level prior to being aggregated at the level of the HR variable. The function `create_bar` returns either a plot object or a table, depending on the value passed to `return_type`. 
"""
import pandas as pd
import seaborn as sns
from vivainsights.extract_date_range import extract_date_range
from vivainsights.us_to_space import us_to_space
from vivainsights.totals_col import totals_col
import matplotlib.ticker as mtick
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator
import matplotlib
    
[docs] def create_bar_calc( data: pd.DataFrame, metric: str, hrvar: str, mingroup = 5, stats = False ): """Calculate the mean of a selected metric, grouped by a selected HR variable.""" data = data.groupby(['PersonId',hrvar]) data = data[metric].mean() data = data.reset_index() output = data.groupby(hrvar).agg( metric = (metric, 'mean'), n = ('PersonId', 'nunique') ) output = output[output['n'] >= mingroup] output = output.rename_axis(hrvar).reset_index() output = output.sort_values(by = 'metric', ascending=False) if stats == True: stats_df = data.groupby(hrvar).agg( sd = (metric, 'std'), median = (metric, 'median'), min = (metric, 'min'), max = (metric, 'max') ) # Join output with stats_df output = pd.merge(output, stats_df, on=hrvar, how='outer') return output
[docs] def create_bar_viz( data: pd.DataFrame, metric: str, hrvar: str, mingroup = 5, percent: bool = False, plot_title = None, plot_subtitle = None): """Visualise the mean of a selected metric, grouped by a selected HR variable.""" sum_df = create_bar_calc(data, metric, hrvar, mingroup) caption_text = extract_date_range(data, return_type='text') plot_order = sum_df[hrvar].to_numpy() # Title and subtitle text if plot_title is None: title_text = us_to_space(metric) else: title_text = plot_title if plot_subtitle is None: subtitle_text = f'Weekly average by {hrvar}' # TODO: make this dynamic by date interval else: subtitle_text = plot_subtitle # fig = plt.figure() fig, ax = plt.subplots(figsize=(4, 6)) # Create grid # Zorder tells it which layer to put it on. We are setting this to 1 and our data to 2 so the grid is behind the data. ax.grid(which="major", axis='x', color='#758D99', alpha=0.6, zorder=1) # Remove splines. Can be done 1 at a time or can slice with a list. ax.spines[['top', 'right', 'bottom']].set_visible(False) # Make left spine slightly thicker ax.spines['left'].set_linewidth(1.1) ax.barh(sum_df[hrvar], sum_df['metric'], color='#1d627e', zorder=2) if percent == True: # Set the x-axis format to percentage ax.xaxis.set_major_formatter(mtick.PercentFormatter(xmax=1.0)) # Shrink y-lim to make plot a bit tighter # Using length of summary table to make it dynamic ax.set_ylim(-0.5, len(sum_df) - 0.5) # Reformat x-axis tick labels ax.xaxis.set_tick_params(labeltop=True, # Put x-axis labels on top labelbottom=False, # Set no x-axis labels on bottom bottom=False, # Set no ticks on bottom labelsize=9, # Set tick label size pad=-1) # Lower tick labels a bit ax.yaxis.set_tick_params(pad=10, # Pad tick labels so they don't go over y-axis labelsize=9, # Set label size bottom=False) # Set no ticks on bottom/left # Reformat y-axis tick labels ax.set_yticks(range(len(sum_df))) ax.set_yticklabels(sum_df[hrvar], ha='right') # Add in line and tag ax.plot([-.35, .87], # Set width of line [1.02, 1.02], # Set height of line transform=fig.transFigure, # Set location relative to plot clip_on=False, color='#fe7f4f', linewidth=.6) ax.add_patch(plt.Rectangle((-.35, 1.02), # Set location of rectangle by lower left corder 0.12, # Width of rectangle -0.02, # Height of rectangle. Negative so it goes down. facecolor='#fe7f4f', transform=fig.transFigure, clip_on=False, linewidth=0)) # Add in title, subtitle, and caption ax.text(x=-.35, y=.96, s=title_text, transform=fig.transFigure, ha='left', fontsize=13, weight='bold', alpha=.8) ax.text(x=-.35, y=.925, s=subtitle_text, transform=fig.transFigure, ha='left', fontsize=11, alpha=.8) ax.text(x=-.35, y=.08, s=caption_text, transform=fig.transFigure, ha='left', fontsize=9, alpha=.7) if percent == True: ax.bar_label(ax.containers[0], labels=[f"{100 * value:.0f}%" for value in sum_df['metric']], label_type="edge", padding=3) else: ax.bar_label(ax.containers[0], fmt='%.0f', label_type='edge', padding=3) # annotate ax.margins(y=0.3) # pad the spacing between the number and the edge of the figure # return the plot object return fig
[docs] def create_bar( data: pd.DataFrame, metric: str, hrvar: str, mingroup: int = 5, percent: bool = False, return_type: str = "plot", plot_title = None, plot_subtitle = None): """ Name ----- create_bar Description ----------- The function `create_bar` calculates and visualizes the mean of a selected metric, grouped by a selected HR variable. The metrics are first aggregated at a user-level prior to being aggregated at the level of the HR variable. `create_bar` returns either a plot object or a table, depending on the value passed to `return_type`. Internally, `create_bar` calls `create_bar_viz()` and `create_bar_calc()` to create the plot and calculate the mean of the selected metric, respectively. Parameters ---------- data : pd.DataFrame Person query data. metric : str Name of the metric to be analysed. hrvar : str Name of the organizational attribute to be used for grouping. mingroup : int, optional Minimum group size. Defaults to 5. percent : bool, optional Whether to display values as percentages. Defaults to False. return_type : str, optional The type of output to return. Can be "plot" or "table". Defaults to "plot". plot_title : str, optional Title of the plot. Defaults to None. plot_subtitle : str, optional Subtitle of the plot. Defaults to None. Returns ------- Various The output, either a plot or a table, depending on the value passed to `return_type`. Example ------- >>> create_bar(pq_data, metric = "Collaboration_hours", hrvar = "LevelDesignation") """ ## Handling None value passed to hrvar if(hrvar is None): data = totals_col(data) hrvar = "Total" if return_type == "plot": out = create_bar_viz(data=data, metric=metric, hrvar=hrvar, percent=percent, mingroup=mingroup, plot_title = plot_title, plot_subtitle = plot_subtitle) elif return_type == "table": out = create_bar_calc(data=data, metric=metric, hrvar=hrvar, mingroup=mingroup) else: out = "Invalid input. Please check your inputs and try again." return out