# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
This module performs a rank operation on all groups across HR attributes for a selected Viva Insights metric.
import pandas as pd
from vivainsights.create_bar import create_bar_calc
from vivainsights.extract_date_range import extract_date_range
import matplotlib.pyplot as plt
from vivainsights.us_to_space import us_to_space
def create_rank_calc(data: pd.DataFrame,
metric: str,
hrvar = ['Organization', 'FunctionType'],
mingroup = 5,
stats = False):
output_list = [] # create an empty list to store the outputs
for i in hrvar:
if stats == True:
ind_df = create_bar_calc(data = data, metric = metric, hrvar = i, stats = True) # individual data frames per hrvar
elif stats == False:
ind_df = create_bar_calc(data = data, metric = metric, hrvar = i, stats = False) # individual data frames per hrvar
ind_df = ind_df.rename(columns = {i: 'attributes'}) # rename the hrvar column to 'attributes'
ind_df['hrvar'] = i # add a column with the name of the hrvar
if stats == True:
ind_df = ind_df[['hrvar', 'attributes', 'metric', 'n', 'sd', 'median', 'max', 'min']] # reorder the columns
elif stats == False:
ind_df = ind_df[['hrvar', 'attributes', 'metric', 'n']] # reorder the columns
output_list.append(ind_df) # appending output to the list
output = pd.concat(output_list, axis=0) # binding the data together
output = output[output['n'] >= mingroup] # filtering out groups with less than mingroup
output = output.sort_values(by = 'metric', ascending=False)
return output
def create_rank_viz(data: pd.DataFrame,
hrvar = ['Organization', 'FunctionType', 'LevelDesignation', 'SupervisorIndicator'],
mingroup = 5):
cap_str = extract_date_range(data, return_type = 'text')
col_highlight = '#fe7f4f'
col_main = '#1d627e'
result_list = []
for i in hrvar:
sum_df = create_rank_calc(data, metric, hrvar, mingroup) # summarised output with columns 'hrvar', 'attributes', 'metric', 'n'
sum_df_top = sum_df[sum_df['hrvar'] == i].head(1) # top 1 row of the summarised output matching the hrvar
sum_df_bot = sum_df[sum_df['hrvar'] == i].tail(1) # bottom 1 row of the summarised output matching the hrvar
sum_df_top['type'] = 'max'
sum_df_bot['type'] = 'min'
result = pd.concat(result_list, axis=0)
result_pivot = result.pivot(index='hrvar', columns='type', values=['attributes','metric'])
result_pivot.columns = ["_".join(a) for a in result_pivot.columns.to_flat_index()]
result_pivot = result_pivot.reset_index()
# Setup plot size.
fig, ax = plt.subplots(figsize=(7,4))
# Create grid
# Zorder tells it which layer to put it on. We are setting this to 1 and our data to 2 so the grid is behind the data.
ax.grid(which="major", axis='both', color='#758D99', alpha=0.6, zorder=1)
# Remove splines. Can be done one at a time or can slice with a list.
# Plot data
# Plot horizontal lines first
zorder=2, linewidth=2, label='_nolegend_', alpha=.8
# Plot bubbles next
ax.scatter(result_pivot['metric_min'], range(len(result_pivot)), label='1960', s=60, color='#DB444B', zorder=3)
ax.scatter(result_pivot['metric_max'], range(len(result_pivot)), label='2020', s=60, color=col_main, zorder=3)
# Set xlim
ax.set_xlim(0, 1.1*result_pivot['metric_max'].max())
# Reformat x-axis tick labels
ax.xaxis.set_tick_params(labeltop=True, labelbottom=False, bottom=False, labelsize=9, pad=-1)
ax.yaxis.set_tick_params(pad=10, labelsize=9, bottom=False)
ax.set_yticklabels(result_pivot['hrvar'], ha='right', fontsize=9)
ax.legend(['min', 'max'],
loc=(-.29, 1.09),
# Add in line and tag
ax.plot([-0.08, .9], [1.17, 1.17], transform=fig.transFigure, clip_on=False, color=col_highlight, linewidth=.6)
ax.add_patch(plt.Rectangle((-0.08, 1.17),
0.05, -0.025, facecolor=col_highlight, transform=fig.transFigure, clip_on=False, linewidth=0))
# Set title
ax.text(x=-0.08, y=1.09, s=us_to_space(metric), transform=fig.transFigure, ha='left', fontsize=13, weight='bold', alpha=.8)
# Set subtitle
ax.text(x=-0.08, y=1.04, s='By organizational attributes', transform=fig.transFigure, ha='left', fontsize=11, alpha=.8)
# Set caption
ax.text(x=-0.08, y=0.04, s=cap_str, transform=fig.transFigure, ha='left', fontsize=9, alpha=.7)
# return the plot object
return fig
def create_rank(data: pd.DataFrame, metric: str, hrvar: str, mingroup = 5, return_type: str = "plot"):
This function performs a rank operation on all groups across HR attributes for a specified metric.
data : pandas dataframe
person query data
metric : str
name of the metric to be analysed
hrvar : str
name(s) of the organizational attribute(s) to be used for grouping
return_type : str or optional
type of output to return. Defaults to "plot".
A plot or a table depending on the return_type argument.
>>> import vivainsights as vi
>>> pq_data = vi.load_pq_data()
>>> create_rank(data = pq_data_small, hrvar = "FunctionType", metric = "Emails_sent", return_type = "plot")
if type(hrvar)==str:
hrvar = [hrvar]
if return_type == "plot":
out = create_rank_viz(data=data, metric=metric, hrvar=hrvar, mingroup=mingroup)
elif return_type == "table":
out = create_rank_calc(data=data, metric=metric, hrvar=hrvar, mingroup=mingroup)
out = "Invalid input. Please check your inputs and try again."
return out