# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
The `create_trend` function provides a week by week view of a selected Viva Insights metric,
allowing you to either return a week by week heatmap bar plot or a summary table.
By default, `create_trend` returns a week by week heatmap bar plot, highlighting the points intime with most activity.
Additional options available to return a summary table.
"""
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
from vivainsights.extract_date_range import extract_date_range
from vivainsights import totals_col
[docs]
def create_trend(data: pd.DataFrame,
metric: str,
palette = [
"#0c3c44",
"#1d627e",
"#34b1e2",
"#bfe5ee",
"#fcf0eb",
"#fbdacd",
"#facebc",
"#fe7f4f"
],
hrvar: str = "Organization",
mingroup = 5,
return_type: str = "plot",
legend_title: str = "Hours",
date_column: str = "MetricDate",
date_format: str = "%Y-%m-%d"):
"""
Name
----
create_trend
Description
-----------
This module provides a week by week view of a selected Viva Insights metric.
By default returns a week by week heatmap bar plot, highlighting the points intime with most activity.
Additional options available to return a summary table.
Parameters
---------
data : panda dataframe
The input data as a pandas DataFrame.
metric : str
The metric parameter is a string that represents the column name in the data DataFrame that contains the values to be plotted or analyzed. This could be any numerical metric such as sales, revenue, or number of hours worked.
palette : list
The `palette` parameter is a list of colors that will be used to represent different groups in the trend plot. Each color in the list corresponds to a different group. By default, the palette includes 8 colors, but you can modify it to include more or fewer colors if needed.
hrvar : str
hrvar is a string parameter that represents the variable used for grouping the data. In this case, it is used to group the data by organization. Defaults to Organization mingroup: The `mingroup` parameter is used to specify the minimum number of groups that should be present in the data for the trend analysis. If the number of unique values in the `hrvar` column is less than `mingroup`, the function will raise an error. Defaults to 5
return_type : str
The `return_type` parameter determines the type of output that the function will return. It can have two possible values:. Defaults to plot
legend_title : str
The title for the legend in the plot. It is used to label the different categories or groups in the data. Defaults to Hours
date_column : str
The name of the column in the DataFrame that contains the dates for the trend analysis. Defaults to MetricDate
date_format : str
The `date_format` parameter is used to specify the format of the dates in the `date_column` of the input data. It should be a string that follows the syntax of the Python `datetime` module's `strftime` function. This allows you to specify how the dates are formatted in the. Defaults to %Y-%m-%d
Returns
------
The function `create_trend` returns either a table or a plot, depending on the value of the `return_type` parameter.
Example
-------
>>> import vivainsights as vi
>>> pq_data = vi.load_pq_data()
>>> create_trend(data = pq_data, metric = "Collaboration_hours", hrvar = "LevelDesignation")
"""
if(hrvar is None):
data = totals_col(data)
hrvar = "Total"
# Return the table or the plot or raise an error
if return_type == "table":
myTable = create_trend_calc(data, metric, hrvar, mingroup, date_column, date_format)
myTable_return = myTable.pivot(index="group", columns=date_column, values=metric)
return myTable_return
elif return_type == "plot":
return create_trend_viz(data, metric, palette, hrvar, mingroup, legend_title, date_column, date_format)
else:
raise ValueError("Please enter a valid input for return_type.")
[docs]
def create_trend_calc(data, metric, hrvar, mingroup, date_column, date_format):
"""
Name
----
create_trend_calc
Description
-----------
This function creates a trend calculation by grouping data by a specified variable and calculating
the mean of a specified metric over time.
"""
# Check inputs
required_variables = [date_column, metric, "PersonId"]
# Error message if variables are not present
# Nothing happens if all present
for var in required_variables:
if var not in data.columns:
raise ValueError(f"{var} is not in the data")
# Clean metric name
clean_nm = metric.replace("_", " ")
# Convert Date to datetime and rename hrvar to group
data[date_column] = pd.to_datetime(data[date_column], format=date_format)
data = data.rename(columns={hrvar: "group"})
# Select relevant columns and group by group
myTable = data[["PersonId", date_column, "group", metric]]
myTable = myTable.groupby("group")
# Calculate employee count and filter by mingroup
myTable = myTable.apply(lambda x: x.assign(Employee_Count = x["PersonId"].nunique()))
myTable = myTable[myTable["Employee_Count"] >= mingroup]
# Group by date and group and calculate mean metric and employee count
myTable.reset_index(drop = True, inplace = True)
myTable = myTable.groupby([date_column, "group"]).agg({"Employee_Count": "mean", metric: "mean"}).reset_index()
return myTable
[docs]
def create_trend_viz(
data: pd.DataFrame,
metric: str,
palette,
hrvar: str,
mingroup,
legend_title: str,
date_column: str,
date_format: str
):
"""
Name
----
create_trend_viz
Description
-----------
This function creates a heatmap visualization of trends in a given metric by a specified variable
over time.
"""
myTable = create_trend_calc(data, metric, hrvar, mingroup, date_column, date_format)
myTable_plot = myTable[[date_column, "group", metric]]
# myTable_plot[date_column] = pd.to_datetime(myTable[date_column], format=date_format)
# myTable_plot[date_column] = pd.to_datetime(myTable[date_column], format=date_format).dt.date
# Clean labels for plotting
clean_nm = metric.replace("_", " ")
title_text = f"{clean_nm} Hotspots"
subtitle_text = f'By {hrvar}'
caption_text = extract_date_range(data, return_type = 'text')
# Create the plot object
# Setup plot size.
fig, ax = plt.subplots(figsize=(7, 4))
# Remove tick marks
ax.tick_params(
which='both', # Both major and minor ticks are affected
top=False, # Remove ticks from the top
bottom=False, # Remove ticks from the bottom
left=False, # Remove ticks from the left
right=False # Remove ticks from the right
)
# Create heatmap plot
sns.heatmap(
data = myTable_plot.pivot(index="group", columns=date_column, values=metric),
cmap = palette,
cbar_kws={"label": legend_title},
xticklabels= myTable_plot[date_column].dt.date.unique()
)
# Reformat x-axis tick labels
ax.xaxis.set_tick_params(labelsize = 9, rotation=45)
ax.yaxis.set_tick_params(labelsize = 9)
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%d %b %y'))
# Remove axis labels
ax.set_xlabel('')
ax.set_ylabel('')
ax.plot(
[-0.08, .9], # Set width of line, previously [-0.08, .9]
[0.9, 0.9], # Set height of line
transform = fig.transFigure, # Set location relative to plot
clip_on = False,
color = '#fe7f4f',
linewidth = .6
)
ax.add_patch(
plt.Rectangle(
(-0.08, 0.9), # Set location of rectangle by lower left corner, previously [-0.08, .9]
0.05, # Width of rectangle
-0.025, # Height of rectangle
facecolor = '#fe7f4f',
transform = fig.transFigure,
clip_on = False,
linewidth = 0
)
)
# Set title
ax.text(
x = -0.08, y = 1.00,
s = title_text,
transform = fig.transFigure,
ha = 'left',
fontsize = 13,
weight = 'bold',
alpha = .8
)
# Set subtitle
ax.text(
x = -0.08, y = 0.95,
s = subtitle_text,
transform = fig.transFigure,
ha = 'left',
fontsize = 11,
alpha = .8
)
# Set caption
ax.text(x=-0.08, y=-0.12, s=caption_text, transform=fig.transFigure, ha='left', fontsize=9, alpha=.7)
# return the plot object
return fig
"""Legacy
# plot_object.set_title(f"{clean_nm}\nHotspots by {hrvar.lower()}")
# plot_object.set_xlabel(date_column)
# plot_object.set_ylabel(hrvar)
"""