Source code for vivainsights.create_rank

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
This module performs a rank operation on all groups across HR attributes for a selected Viva Insights metric.
"""
import pandas as pd
from vivainsights.create_bar import create_bar_calc
from vivainsights.extract_date_range import extract_date_range
import matplotlib.pyplot as plt
from vivainsights.us_to_space import us_to_space

[docs] def create_rank_calc(data: pd.DataFrame, metric: str, hrvar = ['Organization', 'FunctionType'], mingroup = 5, stats = False): output_list = [] # create an empty list to store the outputs for i in hrvar: if stats == True: ind_df = create_bar_calc(data = data, metric = metric, hrvar = i, stats = True) # individual data frames per hrvar elif stats == False: ind_df = create_bar_calc(data = data, metric = metric, hrvar = i, stats = False) # individual data frames per hrvar ind_df = ind_df.rename(columns = {i: 'attributes'}) # rename the hrvar column to 'attributes' ind_df['hrvar'] = i # add a column with the name of the hrvar if stats == True: ind_df = ind_df[['hrvar', 'attributes', 'metric', 'n', 'sd', 'median', 'max', 'min']] # reorder the columns elif stats == False: ind_df = ind_df[['hrvar', 'attributes', 'metric', 'n']] # reorder the columns output_list.append(ind_df) # appending output to the list output = pd.concat(output_list, axis=0) # binding the data together output = output[output['n'] >= mingroup] # filtering out groups with less than mingroup output = output.sort_values(by = 'metric', ascending=False) return output
[docs] def create_rank_viz(data: pd.DataFrame, metric, hrvar = ['Organization', 'FunctionType', 'LevelDesignation', 'SupervisorIndicator'], mingroup = 5): cap_str = extract_date_range(data, return_type = 'text') col_highlight = '#fe7f4f' col_main = '#1d627e' result_list = [] for i in hrvar: sum_df = create_rank_calc(data, metric, hrvar, mingroup) # summarised output with columns 'hrvar', 'attributes', 'metric', 'n' sum_df_top = sum_df[sum_df['hrvar'] == i].head(1) # top 1 row of the summarised output matching the hrvar sum_df_bot = sum_df[sum_df['hrvar'] == i].tail(1) # bottom 1 row of the summarised output matching the hrvar sum_df_top['type'] = 'max' sum_df_bot['type'] = 'min' result_list.append(sum_df_top) result_list.append(sum_df_bot) result = pd.concat(result_list, axis=0) result_pivot = result.pivot(index='hrvar', columns='type', values=['attributes','metric']) result_pivot.columns = ["_".join(a) for a in result_pivot.columns.to_flat_index()] result_pivot = result_pivot.reset_index() # Setup plot size. fig, ax = plt.subplots(figsize=(7,4)) # Create grid # Zorder tells it which layer to put it on. We are setting this to 1 and our data to 2 so the grid is behind the data. ax.grid(which="major", axis='both', color='#758D99', alpha=0.6, zorder=1) # Remove splines. Can be done one at a time or can slice with a list. ax.spines[['top','right','bottom']].set_visible(False) # Plot data # Plot horizontal lines first ax.hlines( y=range(len(result_pivot)), xmin=result_pivot['metric_min'], xmax=result_pivot['metric_max'], color='#758D99', zorder=2, linewidth=2, label='_nolegend_', alpha=.8 ) # Plot bubbles next ax.scatter(result_pivot['metric_min'], range(len(result_pivot)), label='1960', s=60, color='#DB444B', zorder=3) ax.scatter(result_pivot['metric_max'], range(len(result_pivot)), label='2020', s=60, color=col_main, zorder=3) # Set xlim ax.set_xlim(0, 1.1*result_pivot['metric_max'].max()) # Reformat x-axis tick labels ax.xaxis.set_tick_params(labeltop=True, labelbottom=False, bottom=False, labelsize=9, pad=-1) ax.yaxis.set_tick_params(pad=10, labelsize=9, bottom=False) ax.set_yticks(range(len(result_pivot))) ax.set_yticklabels(result_pivot['hrvar'], ha='right', fontsize=9) ax.legend(['min', 'max'], loc=(-.29, 1.09), ncol=2, frameon=False, handletextpad=-.1, handleheight=1) # Add in line and tag ax.plot([-0.08, .9], [1.17, 1.17], transform=fig.transFigure, clip_on=False, color=col_highlight, linewidth=.6) ax.add_patch(plt.Rectangle((-0.08, 1.17), 0.05, -0.025, facecolor=col_highlight, transform=fig.transFigure, clip_on=False, linewidth=0)) # Set title ax.text(x=-0.08, y=1.09, s=us_to_space(metric), transform=fig.transFigure, ha='left', fontsize=13, weight='bold', alpha=.8) # Set subtitle ax.text(x=-0.08, y=1.04, s='By organizational attributes', transform=fig.transFigure, ha='left', fontsize=11, alpha=.8) # Set caption ax.text(x=-0.08, y=0.04, s=cap_str, transform=fig.transFigure, ha='left', fontsize=9, alpha=.7) # return the plot object return fig
[docs] def create_rank(data: pd.DataFrame, metric: str, hrvar: str, mingroup = 5, return_type: str = "plot"): """ Name ---- create_rank Description ----------- This function performs a rank operation on all groups across HR attributes for a specified metric. Parameters --------- data : pandas dataframe person query data metric : str name of the metric to be analysed hrvar : str name(s) of the organizational attribute(s) to be used for grouping return_type : str or optional type of output to return. Defaults to "plot". Returns ------- A plot or a table depending on the return_type argument. Example ------- >>> import vivainsights as vi >>> pq_data = vi.load_pq_data() >>> create_rank(data = pq_data_small, hrvar = "FunctionType", metric = "Emails_sent", return_type = "plot") """ if type(hrvar)==str: hrvar = [hrvar] if return_type == "plot": out = create_rank_viz(data=data, metric=metric, hrvar=hrvar, mingroup=mingroup) elif return_type == "table": out = create_rank_calc(data=data, metric=metric, hrvar=hrvar, mingroup=mingroup) else: out = "Invalid input. Please check your inputs and try again." return out