Source code for vivainsights.create_trend

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
The `create_trend` function provides a week by week view of a selected Viva Insights metric, 
allowing you to either return a week by week heatmap bar plot or a summary table. 
By default, `create_trend` returns a week by week heatmap bar plot, highlighting the points intime with most activity. 
Additional options available to return a summary table.
"""

import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from vivainsights.extract_date_range import extract_date_range
from vivainsights import totals_col

[docs] def create_trend(data: pd.DataFrame, metric: str, palette = [ "#0c3c44", "#1d627e", "#34b1e2", "#bfe5ee", "#fcf0eb", "#fbdacd", "#facebc", "#fe7f4f" ], hrvar: str = "Organization", mingroup = 5, return_type: str = "plot", legend_title: str = "Hours", date_column: str = "MetricDate", date_format: str = "%Y-%m-%d"): """ Name ---- create_trend Description ----------- This module provides a week by week view of a selected Viva Insights metric. By default returns a week by week heatmap bar plot, highlighting the points intime with most activity. Additional options available to return a summary table. Parameters --------- data : panda dataframe The input data as a pandas DataFrame. metric : str The metric parameter is a string that represents the column name in the data DataFrame that contains the values to be plotted or analyzed. This could be any numerical metric such as sales, revenue, or number of hours worked. palette : list The `palette` parameter is a list of colors that will be used to represent different groups in the trend plot. Each color in the list corresponds to a different group. By default, the palette includes 8 colors, but you can modify it to include more or fewer colors if needed. hrvar : str hrvar is a string parameter that represents the variable used for grouping the data. In this case, it is used to group the data by organization. Defaults to Organization mingroup: The `mingroup` parameter is used to specify the minimum number of groups that should be present in the data for the trend analysis. If the number of unique values in the `hrvar` column is less than `mingroup`, the function will raise an error. Defaults to 5 return_type : str The `return_type` parameter determines the type of output that the function will return. It can have two possible values:. Defaults to plot legend_title : str The title for the legend in the plot. It is used to label the different categories or groups in the data. Defaults to Hours date_column : str The name of the column in the DataFrame that contains the dates for the trend analysis. Defaults to MetricDate date_format : str The `date_format` parameter is used to specify the format of the dates in the `date_column` of the input data. It should be a string that follows the syntax of the Python `datetime` module's `strftime` function. This allows you to specify how the dates are formatted in the. Defaults to %Y-%m-%d Returns ------ The function `create_trend` returns either a table or a plot, depending on the value of the `return_type` parameter. Example ------- >>> import vivainsights as vi >>> pq_data = vi.load_pq_data() >>> create_trend(data = pq_data, metric = "Collaboration_hours", hrvar = "LevelDesignation") """ if(hrvar is None): data = totals_col(data) hrvar = "Total" # Return the table or the plot or raise an error if return_type == "table": myTable = create_trend_calc(data, metric, hrvar, mingroup, date_column, date_format) myTable_return = myTable.pivot(index="group", columns=date_column, values=metric) return myTable_return elif return_type == "plot": return create_trend_viz(data, metric, palette, hrvar, mingroup, legend_title, date_column, date_format) else: raise ValueError("Please enter a valid input for return_type.")
[docs] def create_trend_calc(data, metric, hrvar, mingroup, date_column, date_format): """ Name ---- create_trend_calc Description ----------- This function creates a trend calculation by grouping data by a specified variable and calculating the mean of a specified metric over time. """ # Check inputs required_variables = [date_column, metric, "PersonId"] # Error message if variables are not present # Nothing happens if all present for var in required_variables: if var not in data.columns: raise ValueError(f"{var} is not in the data") # Clean metric name clean_nm = metric.replace("_", " ") # Convert Date to datetime and rename hrvar to group data[date_column] = pd.to_datetime(data[date_column], format=date_format) data = data.rename(columns={hrvar: "group"}) # Select relevant columns and group by group myTable = data[["PersonId", date_column, "group", metric]] myTable = myTable.groupby("group") # Calculate employee count and filter by mingroup myTable = myTable.apply(lambda x: x.assign(Employee_Count = x["PersonId"].nunique())) myTable = myTable[myTable["Employee_Count"] >= mingroup] # Group by date and group and calculate mean metric and employee count myTable.reset_index(drop = True, inplace = True) myTable = myTable.groupby([date_column, "group"]).agg({"Employee_Count": "mean", metric: "mean"}).reset_index() return myTable
[docs] def create_trend_viz( data: pd.DataFrame, metric: str, palette, hrvar: str, mingroup, legend_title: str, date_column: str, date_format: str ): """ Name ---- create_trend_viz Description ----------- This function creates a heatmap visualization of trends in a given metric by a specified variable over time. """ myTable = create_trend_calc(data, metric, hrvar, mingroup, date_column, date_format) myTable_plot = myTable[[date_column, "group", metric]] # myTable_plot[date_column] = pd.to_datetime(myTable[date_column], format=date_format) # myTable_plot[date_column] = pd.to_datetime(myTable[date_column], format=date_format).dt.date # Clean labels for plotting clean_nm = metric.replace("_", " ") title_text = f"{clean_nm} Hotspots" subtitle_text = f'By {hrvar}' caption_text = extract_date_range(data, return_type = 'text') # Create the plot object # Setup plot size. fig, ax = plt.subplots(figsize=(7, 4)) # Remove tick marks ax.tick_params( which='both', # Both major and minor ticks are affected top=False, # Remove ticks from the top bottom=False, # Remove ticks from the bottom left=False, # Remove ticks from the left right=False # Remove ticks from the right ) # Create heatmap plot sns.heatmap( data = myTable_plot.pivot(index="group", columns=date_column, values=metric), cmap = palette, cbar_kws={"label": legend_title}, xticklabels= myTable_plot[date_column].dt.date.unique() ) # Reformat x-axis tick labels ax.xaxis.set_tick_params(labelsize = 9, rotation=45) ax.yaxis.set_tick_params(labelsize = 9) # ax.xaxis.set_major_formatter(mdates.DateFormatter('%d %b %y')) # Remove axis labels ax.set_xlabel('') ax.set_ylabel('') ax.plot( [-0.08, .9], # Set width of line, previously [-0.08, .9] [0.9, 0.9], # Set height of line transform = fig.transFigure, # Set location relative to plot clip_on = False, color = '#fe7f4f', linewidth = .6 ) ax.add_patch( plt.Rectangle( (-0.08, 0.9), # Set location of rectangle by lower left corner, previously [-0.08, .9] 0.05, # Width of rectangle -0.025, # Height of rectangle facecolor = '#fe7f4f', transform = fig.transFigure, clip_on = False, linewidth = 0 ) ) # Set title ax.text( x = -0.08, y = 1.00, s = title_text, transform = fig.transFigure, ha = 'left', fontsize = 13, weight = 'bold', alpha = .8 ) # Set subtitle ax.text( x = -0.08, y = 0.95, s = subtitle_text, transform = fig.transFigure, ha = 'left', fontsize = 11, alpha = .8 ) # Set caption ax.text(x=-0.08, y=-0.12, s=caption_text, transform=fig.transFigure, ha='left', fontsize=9, alpha=.7) # return the plot object return fig """Legacy # plot_object.set_title(f"{clean_nm}\nHotspots by {hrvar.lower()}") # plot_object.set_xlabel(date_column) # plot_object.set_ylabel(hrvar) """