Source code for vivainsights.p2p_data_sim


# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
Generate an person-to-person query / edgelist based on the graph according to the Watts-Strogatz
small-world network model. Organizational data fields are also simulated for `Organization`, `LevelDesignation`, and `City`
data frame with the same column structure as a person-to-person flexible query.
This has an edgelist structure and can be used directly as an input to `network_p2p()`.
"""
import igraph
import pandas as pd

[docs] def p2p_data_sim(dim=1, size=300, nei=5, p=0.05): graph = igraph.Graph.Watts_Strogatz(dim=dim, size=size, nei=nei, p=p) edgelist = graph.get_edgelist() df = pd.DataFrame(edgelist, columns=["PrimaryCollaborator_PersonId", "SecondaryCollaborator_PersonId"]) def add_cat(x, type): if type == "Organization": if x % 7 == 0: return "Org A" elif x % 6 == 0: return "Org B" elif x % 5 == 0: return "Org C" elif x % 4 == 0: return "Org D" elif x % 3 == 0: return "Org E" elif x < 100: return "Org F" elif x % 2 == 0: return "Org G" else: return "Org H" elif type == "LevelDesignation": return "Level " + str(x)[0] elif type == "City": if x % 3 == 0: return "City A" elif x % 2 == 0: return "City B" else: return "City C" df["PrimaryCollaborator_Organization"] = df["PrimaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "Organization")) df["SecondaryCollaborator_Organization"] = df["SecondaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "Organization")) df["PrimaryCollaborator_LevelDesignation"] = df["PrimaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "LevelDesignation")) df["SecondaryCollaborator_LevelDesignation"] = df["SecondaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "LevelDesignation")) df["PrimaryCollaborator_City"] = df["PrimaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "City")) df["SecondaryCollaborator_City"] = df["SecondaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "City")) df["PrimaryCollaborator_PersonId"] = df["PrimaryCollaborator_PersonId"].apply(lambda x: f"SIM_ID_{x}") df["SecondaryCollaborator_PersonId"] = df["SecondaryCollaborator_PersonId"].apply(lambda x: f"SIM_ID_{x}") df["StrongTieScore"] = 1 return df