Source code for vivainsights.p2p_data_sim
# --------------------------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License. See LICENSE.txt in the project root for license information.
# --------------------------------------------------------------------------------------------
"""
Simulate a person-to-person network using the Watts-Strogatz model.
Organizational data fields are also simulated for `Organization`, `LevelDesignation`, and `City`
data frame with the same column structure as a person-to-person flexible query.
This has an edgelist structure and can be used directly as an input to `network_p2p()`.
"""
__all__ = ['p2p_data_sim']
import igraph
import pandas as pd
[docs]
def p2p_data_sim(dim=1, size=300, nei=5, p=0.05):
"""Simulate a person-to-person network dataset.
Generate a synthetic person-to-person edgelist using the Watts-Strogatz
small-world model. Organizational attributes (``Organization``,
``LevelDesignation``, ``City``) are simulated for both primary and
secondary collaborators. The output can be passed directly to
``network_p2p()``.
Parameters
----------
dim : int, default 1
Dimension of the Watts-Strogatz lattice.
size : int, default 300
Number of nodes in the network.
nei : int, default 5
Number of neighbours each node is connected to in the lattice.
p : float, default 0.05
Rewiring probability.
Returns
-------
pandas.DataFrame
An edgelist DataFrame with columns for person IDs, organizational
attributes, and a ``StrongTieScore`` column.
Examples
--------
Generate a small simulated network:
>>> import vivainsights as vi
>>> sim = vi.p2p_data_sim(size=50)
Customize the Watts-Strogatz parameters:
>>> sim = vi.p2p_data_sim(size=100, dim=2, nei=3, p=0.1)
"""
graph = igraph.Graph.Watts_Strogatz(dim=dim, size=size, nei=nei, p=p)
edgelist = graph.get_edgelist()
df = pd.DataFrame(edgelist, columns=["PrimaryCollaborator_PersonId", "SecondaryCollaborator_PersonId"])
def add_cat(x, type):
if type == "Organization":
if x % 7 == 0:
return "Org A"
elif x % 6 == 0:
return "Org B"
elif x % 5 == 0:
return "Org C"
elif x % 4 == 0:
return "Org D"
elif x % 3 == 0:
return "Org E"
elif x < 100:
return "Org F"
elif x % 2 == 0:
return "Org G"
else:
return "Org H"
elif type == "LevelDesignation":
return "Level " + str(x)[0]
elif type == "City":
if x % 3 == 0:
return "City A"
elif x % 2 == 0:
return "City B"
else:
return "City C"
df["PrimaryCollaborator_Organization"] = df["PrimaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "Organization"))
df["SecondaryCollaborator_Organization"] = df["SecondaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "Organization"))
df["PrimaryCollaborator_LevelDesignation"] = df["PrimaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "LevelDesignation"))
df["SecondaryCollaborator_LevelDesignation"] = df["SecondaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "LevelDesignation"))
df["PrimaryCollaborator_City"] = df["PrimaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "City"))
df["SecondaryCollaborator_City"] = df["SecondaryCollaborator_PersonId"].apply(lambda x: add_cat(x, "City"))
df["PrimaryCollaborator_PersonId"] = df["PrimaryCollaborator_PersonId"].apply(lambda x: f"SIM_ID_{x}")
df["SecondaryCollaborator_PersonId"] = df["SecondaryCollaborator_PersonId"].apply(lambda x: f"SIM_ID_{x}")
df["StrongTieScore"] = 1
return df