Source code for vivainsights.hrvar_count

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
This module generates a count of the distinct persons in the data population.
Returns a bar plot of the counts by default, with an option to return a summary table.
"""
import pandas as pd
import matplotlib.pyplot as plt
from vivainsights.extract_date_range import extract_date_range

[docs] def hrvar_count_calc(data: pd.DataFrame, hrvar: str): """Calculate the number of distinct persons in the data population, grouped by a selected HR variable.""" data = data.groupby([hrvar]) data = data['PersonId'].nunique().reset_index(name='n') output = data.sort_values(by = 'n', ascending=False) return output
[docs] def hrvar_count_viz(data: pd.DataFrame, hrvar: str): """Visualise the number of distinct persons in the data population, grouped by a selected HR variable.""" sum_df = hrvar_count_calc(data = data, hrvar = hrvar) cap_str = extract_date_range(data, return_type = 'text') fig, ax = plt.subplots(figsize=(4, 6)) # Create grid # Zorder tells it which layer to put it on. We are setting this to 1 and our data to 2 so the grid is behind the data. ax.grid(which="major", axis='x', color='#758D99', alpha=0.6, zorder=1) # Remove splines. Can be done one at a time or can slice with a list. ax.spines[['top','right','bottom']].set_visible(False) # Make left spine slightly thicker ax.spines['left'].set_linewidth(1.1) # Create bar plot ax.barh(sum_df[hrvar], sum_df['n'], color='#1d627e', zorder=2) # Shrink y-lim to make plot a bit tighter # Using length of summary table to make it dynamic ax.set_ylim(-0.5, len(sum_df)-0.5) # Reformat x-axis tick labels ax.xaxis.set_tick_params( labeltop=True, # Put x-axis labels on top labelbottom=False, # Set no x-axis labels on bottom bottom=False, # Set no ticks on bottom labelsize=9, # Set tick label size pad=-1 # Lower tick labels a bit ) # Reformat y-axis tick labels ax.yaxis.set_tick_params( pad=10, # Pad tick labels so they don't go over y-axis labelsize=9, # Set label size bottom=False # Set no ticks on bottom/left ) # Reformat y-axis tick labels ax.set_yticks(range(len(sum_df))) ax.set_yticklabels( sum_df[hrvar], # Set labels again ha = 'right' # Set horizontal alignment to right ) # Add in line and tag ax.plot([-.35, .87], # Set width of line [1.02, 1.02], # Set height of line transform=fig.transFigure, # Set location relative to plot clip_on=False, color='#fe7f4f', linewidth=.6) ax.add_patch(plt.Rectangle((-.35,1.02), # Set location of rectangle by lower left corder 0.12, # Width of rectangle -0.02, # Height of rectangle. Negative so it goes down. facecolor='#fe7f4f', transform=fig.transFigure, clip_on=False, linewidth = 0)) # Add in title, subtitle, and caption ax.text(x=-.35, y=.96, s= f'People by {hrvar}', transform=fig.transFigure, ha='left', fontsize=13, weight='bold', alpha=.8) # ax.text(x=-.35, y=.925, s= sub_title, transform=fig.transFigure, ha='left', fontsize=11, alpha=.8) ax.text(x=-.35, y=.08, s=cap_str, transform=fig.transFigure, ha='left', fontsize=9, alpha=.7) plt.bar_label(ax.containers[0], fmt = '%.0f', label_type='edge', padding = 3) # annotate plt.margins(y=0.3) # pad the spacing between the number and the edge of the figure # return the plot object return fig
[docs] def hrvar_count(data: pd.DataFrame, hrvar: str = 'Organization', return_type: str = "plot"): """ Name ---- hrvar_count Description ----------- This function generates a count of the distinct persons in the data population, grouped by a selected HR variable. Parameters --------- data : ppandas dataframe person query data hrvar : str name of the organizational attribute to be used for grouping return_type : str or optional type of output to return. Defaults to "plot". Example ------- >>> hrvar_count(pq_data, hrvar = "LevelDesignation") """ if return_type == "plot": out = hrvar_count_viz(data=data, hrvar=hrvar) elif return_type == "table": out = hrvar_count_calc(data=data, hrvar=hrvar) else: out = "Invalid input. Please check your inputs and try again." return out