Source code for vivainsights.pq_data

# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
This module returns a data frame containing a person query.
"""
import pkg_resources
import pandas as pd
import os

[docs] def load_pq_data(): if(pkg_resources.resource_exists(__name__, 'data/pq_data.csv')): stream = pkg_resources.resource_stream(__name__, 'data/pq_data.csv') elif(pkg_resources.resource_exists(__name__, '../data/pq_data.csv')): stream = pkg_resources.resource_stream(__name__, '../data/pq_data.csv') else: print('Error: please report issue to repo maintainer') # Address `ResourceWarning unclosed file` issue out = pd.read_csv(stream, encoding='utf-8') stream.close() # ------------------------------------------------------------------ # Compatibility shims for tests and downstream functions # ------------------------------------------------------------------ # 1) Ensure expected meeting columns exist # Some tests refer to 'Meeting_hours' while the dataset provides # 'Meeting_and_call_hours'. Create a safe alias when needed. if 'Meeting_hours' not in out.columns: if 'Meeting_and_call_hours' in out.columns: out['Meeting_hours'] = out['Meeting_and_call_hours'] else: # Fallback to zeros to maintain numeric type out['Meeting_hours'] = 0.0 # 2) Provide a reasonable numeric proxy for 'Multitasking_hours' if missing if 'Multitasking_hours' not in out.columns: if 'After_hours_collaboration_hours' in out.columns: out['Multitasking_hours'] = out['After_hours_collaboration_hours'] else: # Choose the first available fallback among common numeric columns for col in ['Channel_message_posts', 'Emails_sent', 'Collaboration_hours']: if col in out.columns: out['Multitasking_hours'] = out[col] break else: out['Multitasking_hours'] = 0.0 # 3) Stabilize common HR grouping columns to reduce empty groups / NaNs if 'LevelDesignation' in out.columns: if 'Level' in out.columns: out['LevelDesignation'] = out['LevelDesignation'].fillna(out['Level']) out['LevelDesignation'] = out['LevelDesignation'].fillna('Unknown').astype(str) if 'Organization' in out.columns: out['Organization'] = out['Organization'].fillna('Unknown').astype(str) return out