Source code for vivainsights.create_line

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
This module visualizes the average of metric by sub-population over time. 
Returns a line plot showing the average of a selected metric by default.
Additional options available to return a summary table.
"""
import pandas as pd
import seaborn as sns
import numpy as np
from vivainsights.extract_date_range import extract_date_range
from vivainsights.color_codes import *
from vivainsights.totals_col import totals_col
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import warnings

# Ignore warnings for cleaner output
warnings.filterwarnings("ignore")


[docs] def create_line_calc(data: pd.DataFrame, metric: str, hrvar: str, mingroup = 5): output = data.groupby(['MetricDate', hrvar]).agg( metric = (metric, 'mean'), n = ('PersonId', 'nunique') ) output = output[output['n'] >= mingroup] output = output.reset_index() return output
[docs] def create_line_viz(data: pd.DataFrame, metric: str, hrvar: str, mingroup = 5): # summarised output sum_df = create_line_calc(data, metric, hrvar, mingroup) sum_df['MetricDate'] = pd.to_datetime(sum_df['MetricDate'], format='%Y-%m-%d') # Set colours for the plot col_highlight = Colors.HIGHLIGHT_NEGATIVE.value col_main = Colors.PRIMARY.value # Clean labels for plotting clean_nm = metric.replace("_", " ") cap_str = extract_date_range(sum_df, return_type = 'text') sub_str = f'By {hrvar}' if(len(data[hrvar].unique()) <=4 ): #if hrvar column has 4 or less distinct values # Setup plot size. fig, ax = plt.subplots(figsize=(7,4)) sns.lineplot( data = sum_df, x = 'MetricDate', y = 'metric', hue = hrvar, ax = ax, palette = COLOR_PALLET_ALT_2[0:sum_df[hrvar].nunique()] # count distinct values of hrvar ) # Remove splines. Can be done one at a time or can slice with a list. ax.spines[['top','right','left']].set_visible(False) # Shrink y-lim to make plot a bit tighter ax.set_ylim(0) # Reformat x-axis tick labels ax.xaxis.set_tick_params(labelsize = 9, rotation=45) # Set tick label size and rotation ax.xaxis.set_major_formatter(mdates.DateFormatter('%d %b %y')) ax.set_xlabel('') # Remove x-axis label # Reformat y-axis tick labels ax.yaxis.set_major_locator(plt.FixedLocator(np.arange(0, 25, 5))) ax.yaxis.set_major_formatter(plt.FuncFormatter(lambda x, _: f'{x:.0f}')) ax.yaxis.set_tick_params(pad=-2, # Pad tick labels so they don't go over y-axis labeltop=True, # Put x-axis labels on top labelbottom=False, # Set no x-axis labels on bottom bottom=False, # Set no ticks on bottom labelsize=11) # Set title ax.text(x=0.12, y=.91, s= clean_nm, transform=fig.transFigure, ha='left', fontsize=13, weight='bold', alpha=.8) # Set subtitle ax.text(x=0.12, y=.86, s=sub_str, transform=fig.transFigure, ha='left', fontsize=11, alpha=.8) # Set source text ax.text(x=0.12, y=-0.08, s=cap_str, transform=fig.transFigure, ha='left', fontsize=9, alpha=.7) # return the plot object return fig # fig.show() else: #hrvar has more than 4 distinct values, so we use facet grid facet_grid_plot = sns.FacetGrid(data = sum_df, hue = hrvar, col = hrvar, col_wrap=2, height=4, aspect=1 ) facet_grid_plot.map(sns.lineplot,"MetricDate","metric") #To add space between the title and subplots facet_grid_plot.figure.tight_layout(rect=[0, 0, 1, 0.96]) # Set title facet_grid_plot.figure.text(x=0.07, y=1, s= clean_nm, ha='left', fontsize=13, weight='bold', alpha=.8) # Set subtitle facet_grid_plot.figure.text(x=0.07, y=.98, s=sub_str, ha='left', fontsize=11, alpha=.8) # Set source text facet_grid_plot.figure.text(x=0.1, y=-0.08, s=cap_str, ha='left', fontsize=9, alpha=.7) #setting labels for ax in facet_grid_plot.axes: ax.set_ylabel(clean_nm) ax.set_xlabel('') #rotating dates on xticklabels for better readability for ax in facet_grid_plot.axes.flat: if(len(ax.get_xticklabels())>0): ax.set_xticklabels(ax.get_xticklabels(),rotation=45) return facet_grid_plot """ Legacy ggplot plot = ( ggplot(sum_df, aes( x='MetricDate', y='metric', group=hrvar) ) + geom_line() + facet_wrap(f'~{hrvar}', ncol = 2) + labs( title = f'{metric}\n\n{cap_str}', caption = cap_str, y=metric ) + scale_x_date(date_breaks = "1 month", date_labels = "%b %Y") + theme(axis_text_x=element_text(angle=60, hjust=1)) ) return plot """
[docs] def create_line(data: pd.DataFrame, metric: str, hrvar: str, mingroup = 5, return_type: str = 'plot'): """ Name ---- create_line Description ----------- Provides a week by week view of a selected metric, visualised as line charts. Parameters ---------- data : pandas dataframe person query data metric : str name of the metric to be analysed hrvar : str name of the organizational attribute to be used for grouping mingroup : int, optional Numeric value setting the privacy threshold / minimum group size, by default 5 return_type : str, optional type of output to return. Defaults to "plot". Returns ------- Various The output, either a plot or a table, depending on the value passed to `return_type`. Example ------- >>> import vivainsights as vi >>> pq_data = vi.load_pq_data() >>> create_line(pq_data, metric = "Collaboration_hours", hrvar = "LevelDesignation") """ ## Handling None value passed to hrvar if(hrvar is None): data = totals_col(data = data) hrvar = "Total" if return_type == "plot": out = create_line_viz(data=data, metric=metric, hrvar=hrvar, mingroup=mingroup) elif return_type == "table": out = create_line_calc(data=data, metric=metric, hrvar=hrvar, mingroup=mingroup) else: out = "Invalid input. Please check your inputs and try again." return out