Method 3: Difference-in-Differences (DiD)
Overview
Difference-in-Differences (DiD) exploits variation in the timing of treatment implementation across groups or regions. By comparing changes over time between treated and control groups, DiD controls for time-invariant confounders that affect both groups equally.
Navigation:
When to Use Difference-in-Differences
Key Advantages:
- Controls for unobserved confounders that are constant over time
- Natural experiment design when treatment timing varies
- Policy evaluation strength for interventions with staggered rollout
- Intuitive interpretation as βdifference of differencesβ
Best for:
- Interventions rolled out across teams/regions at different times
- Pre/post comparisons with control groups
- Policy changes with clear implementation dates
- When treatment assignment is correlated with time-invariant factors
Key Assumptions:
- Parallel trends: Control and treatment groups would follow similar trends without treatment
- No anticipation effects: Behavior doesnβt change before actual treatment
- Stable composition: Group composition remains stable over time
Data Structure and Preparation
DiD requires panel data with observations before and after treatment for both treated and control groups.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import statsmodels.formula.api as smf
from scipy import stats
def prepare_did_data(df, unit_col, time_col, treatment_col, outcome_col, treatment_date):
"""
Prepare data for Difference-in-Differences analysis
DiD Setup:
- Panel data: Multiple observations per unit over time
- Treatment timing: Clear before/after periods
- Control group: Never treated or treated later
Parameters:
- unit_col: Identifier for units (teams, individuals, regions)
- time_col: Time variable (date, period, month)
- treatment_col: Binary treatment indicator
- outcome_col: Outcome variable
- treatment_date: When treatment began
"""
df_did = df.copy()
# Convert time column if needed
if df_did[time_col].dtype == 'object':
df_did[time_col] = pd.to_datetime(df_did[time_col])
# Create before/after treatment indicator
df_did['post_treatment'] = (df_did[time_col] >= treatment_date).astype(int)
# Create treatment-time interaction
df_did['treated_post'] = df_did[treatment_col] * df_did['post_treatment']
print(f"=== DIFFERENCE-IN-DIFFERENCES DATA SETUP ===")
print(f"Units: {df_did[unit_col].nunique()}")
print(f"Time periods: {df_did[time_col].nunique()}")
print(f"Treatment date: {treatment_date}")
# Sample composition
treated_units = df_did[df_did[treatment_col] == 1][unit_col].nunique()
control_units = df_did[df_did[treatment_col] == 0][unit_col].nunique()
pre_periods = (df_did[time_col] < treatment_date).sum()
post_periods = (df_did[time_col] >= treatment_date).sum()
print(f"Treated units: {treated_units}")
print(f"Control units: {control_units}")
print(f"Pre-treatment observations: {pre_periods}")
print(f"Post-treatment observations: {post_periods}")
# Check for balanced panel
expected_obs = df_did[unit_col].nunique() * df_did[time_col].nunique()
actual_obs = len(df_did)
panel_balance = actual_obs / expected_obs
print(f"Panel balance: {panel_balance:.1%} of expected observations")
if panel_balance < 0.9:
print("β οΈ Warning: Unbalanced panel - consider investigating missing data")
# Data quality checks
quality_issues = []
# Check for units that change treatment status
treatment_switches = df_did.groupby(unit_col)[treatment_col].nunique()
switchers = (treatment_switches > 1).sum()
if switchers > 0:
quality_issues.append(f"{switchers} units change treatment status")
# Check for missing outcomes
missing_outcomes = df_did[outcome_col].isnull().sum()
if missing_outcomes > 0:
quality_issues.append(f"{missing_outcomes} missing outcome values")
if quality_issues:
print("Data quality issues:")
for issue in quality_issues:
print(f" β οΈ {issue}")
return df_did
# Example data preparation
# This assumes you have panel data with team_id, date, copilot_usage, tickets_per_week
treatment_start = pd.to_datetime('2024-03-01')
df_did = prepare_did_data(
df, 'team_id', 'date', 'copilot_usage', 'tickets_per_week', treatment_start
)
Pre-Treatment Trends Analysis
The key assumption in DiD is parallel trends - that treated and control groups would have followed similar trends in the absence of treatment.
def test_parallel_trends(df_did, unit_col, time_col, treatment_col, outcome_col,
treatment_date, periods_before=6):
"""
Test the parallel trends assumption
Parallel trends is the identifying assumption of DiD:
- Control group shows what would have happened to treatment group
- Requires similar pre-treatment trends between groups
- Tested using pre-treatment data only
"""
# Focus on pre-treatment period
pre_treatment = df_did[df_did[time_col] < treatment_date].copy()
if len(pre_treatment) == 0:
print("β No pre-treatment data available for trends test")
return None
# Limit to recent pre-treatment periods for relevance
periods = sorted(pre_treatment[time_col].unique())[-periods_before:]
pre_treatment = pre_treatment[pre_treatment[time_col].isin(periods)]
print(f"=== PARALLEL TRENDS ANALYSIS ===")
print(f"Pre-treatment periods analyzed: {len(periods)}")
print(f"Date range: {min(periods)} to {max(periods)}")
# Calculate group means by period
trends_data = pre_treatment.groupby([time_col, treatment_col])[outcome_col].mean().reset_index()
trends_pivot = trends_data.pivot(index=time_col, columns=treatment_col, values=outcome_col)
trends_pivot.columns = ['Control', 'Treatment']
# Visual trends test
plt.figure(figsize=(12, 6))
plt.plot(trends_pivot.index, trends_pivot['Control'], 'o-', label='Control Group', linewidth=2)
plt.plot(trends_pivot.index, trends_pivot['Treatment'], 's-', label='Treatment Group', linewidth=2)
plt.axvline(x=treatment_date, color='red', linestyle='--', alpha=0.7, label='Treatment Start')
plt.xlabel('Date')
plt.ylabel(outcome_col.replace('_', ' ').title())
plt.title('Pre-Treatment Trends: Parallel Trends Test')
plt.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
# Statistical test for parallel trends
# Create time trend variable (periods from first observation)
pre_treatment['time_trend'] = (pre_treatment[time_col] - pre_treatment[time_col].min()).dt.days
# Test if treatment group has different time trend
trend_formula = f'{outcome_col} ~ time_trend * {treatment_col}'
trend_model = smf.ols(trend_formula, data=pre_treatment).fit()
interaction_coef = trend_model.params.get(f'time_trend:{treatment_col}', 0)
interaction_pvalue = trend_model.pvalues.get(f'time_trend:{treatment_col}', 1)
print(f"\n=== PARALLEL TRENDS TEST ===")
print(f"Time trend interaction coefficient: {interaction_coef:.4f}")
print(f"P-value: {interaction_pvalue:.4f}")
if interaction_pvalue > 0.05:
print("β
PASS: No evidence against parallel trends (p > 0.05)")
trends_valid = True
else:
print("β οΈ CONCERN: Evidence of different pre-trends (p β€ 0.05)")
print(" Consider: longer pre-period, group-specific trends, or alternative methods")
trends_valid = False
# Calculate trend differences
control_trend = trends_pivot['Control'].pct_change().mean()
treatment_trend = trends_pivot['Treatment'].pct_change().mean()
trend_difference = treatment_trend - control_trend
print(f"\nPre-treatment growth rates:")
print(f"Control group: {control_trend:.1%} per period")
print(f"Treatment group: {treatment_trend:.1%} per period")
print(f"Difference: {trend_difference:.1%} per period")
return {
'trends_valid': trends_valid,
'interaction_coef': interaction_coef,
'p_value': interaction_pvalue,
'trend_difference': trend_difference,
'trends_data': trends_pivot
}
# Test parallel trends assumption
trends_test = test_parallel_trends(
df_did, 'team_id', 'date', 'copilot_usage', 'tickets_per_week', treatment_start
)
Basic Difference-in-Differences Estimation
The core DiD specification compares the change in outcomes between treatment and control groups.
def estimate_basic_did(df_did, treatment_col, outcome_col, unit_col='team_id'):
"""
Basic two-period Difference-in-Differences estimation
DiD Formula: (Y_treated_post - Y_treated_pre) - (Y_control_post - Y_control_pre)
Interpretation:
- First difference: Change in treatment group
- Second difference: Change in control group (counterfactual)
- DiD effect: Treatment impact above natural trends
"""
# Calculate group means by period
group_means = df_did.groupby([treatment_col, 'post_treatment'])[outcome_col].mean().reset_index()
means_pivot = group_means.pivot(index='post_treatment', columns=treatment_col, values=outcome_col)
means_pivot.columns = ['Control', 'Treatment']
means_pivot.index = ['Pre-treatment', 'Post-treatment']
print(f"=== BASIC DIFFERENCE-IN-DIFFERENCES ===")
print("\nGroup means by period:")
print(means_pivot.round(3))
# Calculate differences
control_change = means_pivot.loc['Post-treatment', 'Control'] - means_pivot.loc['Pre-treatment', 'Control']
treatment_change = means_pivot.loc['Post-treatment', 'Treatment'] - means_pivot.loc['Pre-treatment', 'Treatment']
did_effect = treatment_change - control_change
print(f"\n=== DIFFERENCE CALCULATIONS ===")
print(f"Control group change: {control_change:.3f}")
print(f"Treatment group change: {treatment_change:.3f}")
print(f"Difference-in-Differences: {did_effect:.3f}")
# Regression-based DiD (more flexible for standard errors)
did_formula = f'{outcome_col} ~ {treatment_col} + post_treatment + treated_post'
did_model = smf.ols(did_formula, data=df_did).fit(cov_type='cluster', cov_kwds={'groups': df_did[unit_col]})
# Extract results
did_coef = did_model.params['treated_post']
did_se = did_model.bse['treated_post']
did_ci = did_model.conf_int().loc['treated_post']
did_pvalue = did_model.pvalues['treated_post']
print(f"\n=== REGRESSION-BASED DiD ===")
print(f"Treatment effect: {did_coef:.3f}")
print(f"Standard error: {did_se:.3f}")
print(f"95% CI: [{did_ci[0]:.3f}, {did_ci[1]:.3f}]")
print(f"P-value: {did_pvalue:.4f}")
# Effect size interpretation
outcome_std = df_did[outcome_col].std()
effect_size = did_coef / outcome_std
print(f"Effect size (Cohen's d): {effect_size:.3f}")
if abs(effect_size) < 0.2:
magnitude = "small"
elif abs(effect_size) < 0.5:
magnitude = "medium"
else:
magnitude = "large"
print(f"Effect magnitude: {magnitude}")
# Statistical significance
if did_pvalue < 0.001:
significance = "highly significant (p < 0.001)"
elif did_pvalue < 0.01:
significance = "very significant (p < 0.01)"
elif did_pvalue < 0.05:
significance = "significant (p < 0.05)"
else:
significance = "not significant (p β₯ 0.05)"
print(f"Statistical significance: {significance}")
return {
'did_effect': did_coef,
'se': did_se,
'ci': [did_ci[0], did_ci[1]],
'p_value': did_pvalue,
'effect_size': effect_size,
'model': did_model,
'group_means': means_pivot
}
# Estimate basic DiD effect
basic_did_results = estimate_basic_did(df_did, 'copilot_usage', 'tickets_per_week')
Event Study Analysis
Event studies examine treatment effects period-by-period around the intervention, providing detailed insights into effect dynamics.
def event_study_analysis(df_did, time_col, treatment_col, outcome_col, unit_col,
treatment_date, periods_before=6, periods_after=6):
"""
Event study analysis for dynamic treatment effects
Event studies show:
- Pre-treatment effects (should be zero if parallel trends hold)
- Treatment effect evolution over time
- Whether effects persist, grow, or fade
"""
df_event = df_did.copy()
# Create event time variable (periods relative to treatment)
if df_event[time_col].dtype == 'datetime64[ns]':
# For datetime, use months
df_event['event_time'] = ((df_event[time_col] - treatment_date).dt.days / 30).round().astype(int)
else:
# For numeric time
treatment_period = df_event[df_event[time_col] >= treatment_date][time_col].min()
df_event['event_time'] = df_event[time_col] - treatment_period
# Focus on relevant event window
event_window = (df_event['event_time'] >= -periods_before) & (df_event['event_time'] <= periods_after)
df_event = df_event[event_window].copy()
# Create period-specific treatment indicators
# Omit t=-1 as reference period
for t in range(-periods_before, periods_after + 1):
if t != -1: # Reference period
df_event[f'treat_t{t}'] = ((df_event['event_time'] == t) &
(df_event[treatment_col] == 1)).astype(int)
# Event study regression
treatment_vars = [f'treat_t{t}' for t in range(-periods_before, periods_after + 1) if t != -1]
formula = f'{outcome_col} ~ {" + ".join(treatment_vars)} + C({unit_col}) + C(event_time)'
print(f"=== EVENT STUDY ANALYSIS ===")
print(f"Event window: t-{periods_before} to t+{periods_after}")
print(f"Reference period: t-1")
print(f"Observations in window: {len(df_event)}")
event_model = smf.ols(formula, data=df_event).fit(cov_type='cluster',
cov_kwds={'groups': df_event[unit_col]})
# Extract coefficients and confidence intervals
event_results = []
for t in range(-periods_before, periods_after + 1):
if t == -1:
# Reference period
coef, se, ci_low, ci_high = 0, 0, 0, 0
else:
var_name = f'treat_t{t}'
if var_name in event_model.params:
coef = event_model.params[var_name]
se = event_model.bse[var_name]
ci = event_model.conf_int().loc[var_name]
ci_low, ci_high = ci[0], ci[1]
else:
coef, se, ci_low, ci_high = 0, 0, 0, 0
event_results.append({
'event_time': t,
'coefficient': coef,
'se': se,
'ci_low': ci_low,
'ci_high': ci_high
})
event_df = pd.DataFrame(event_results)
# Plot event study results
plt.figure(figsize=(12, 6))
plt.plot(event_df['event_time'], event_df['coefficient'], 'o-', linewidth=2, markersize=6)
plt.fill_between(event_df['event_time'], event_df['ci_low'], event_df['ci_high'],
alpha=0.3, label='95% CI')
plt.axhline(y=0, color='black', linestyle='-', alpha=0.3)
plt.axvline(x=0, color='red', linestyle='--', alpha=0.7, label='Treatment Start')
plt.xlabel('Periods Relative to Treatment')
plt.ylabel(f'Effect on {outcome_col.replace("_", " ").title()}')
plt.title('Event Study: Dynamic Treatment Effects')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
# Pre-treatment effects test (should be zero)
pre_treatment_effects = event_df[event_df['event_time'] < 0]['coefficient']
pre_treatment_test = stats.ttest_1samp(pre_treatment_effects, 0)
print(f"\n=== PRE-TREATMENT EFFECTS TEST ===")
print(f"Mean pre-treatment effect: {pre_treatment_effects.mean():.4f}")
print(f"T-statistic: {pre_treatment_test.statistic:.3f}")
print(f"P-value: {pre_treatment_test.pvalue:.4f}")
if pre_treatment_test.pvalue > 0.05:
print("β
PASS: No significant pre-treatment effects")
else:
print("β οΈ CONCERN: Significant pre-treatment effects detected")
print(" May indicate parallel trends violation")
# Post-treatment effects summary
post_effects = event_df[event_df['event_time'] >= 0]['coefficient']
print(f"\n=== POST-TREATMENT EFFECTS ===")
print(f"Immediate effect (t=0): {event_df[event_df['event_time']==0]['coefficient'].iloc[0]:.3f}")
print(f"Average post-treatment effect: {post_effects.mean():.3f}")
print(f"Maximum effect: {post_effects.max():.3f}")
print(f"Effect persistence: {len(post_effects[post_effects > 0])} of {len(post_effects)} periods positive")
return {
'event_results': event_df,
'model': event_model,
'pre_treatment_test': pre_treatment_test,
'immediate_effect': event_df[event_df['event_time']==0]['coefficient'].iloc[0],
'average_post_effect': post_effects.mean()
}
# Perform event study analysis
event_study_results = event_study_analysis(
df_did, 'date', 'copilot_usage', 'tickets_per_week', 'team_id', treatment_start
)
Robustness Checks and Diagnostics
Multiple Robustness Tests
def did_robustness_checks(df_did, treatment_col, outcome_col, unit_col, time_col):
"""
Comprehensive robustness checks for DiD analysis
"""
robustness_results = {}
print("=== DIFFERENCE-IN-DIFFERENCES ROBUSTNESS CHECKS ===")
# 1. Alternative time periods
print("\n1. ALTERNATIVE TIME WINDOWS")
# Different post-treatment windows
windows = [1, 3, 6] # months
for window in windows:
df_window = df_did[df_did['post_treatment'] == 0].copy()
post_data = df_did[(df_did['post_treatment'] == 1) &
(df_did[time_col] <= df_did[df_did['post_treatment'] == 1][time_col].min() +
pd.DateOffset(months=window))]
df_window = pd.concat([df_window, post_data])
if len(df_window) > 0:
did_formula = f'{outcome_col} ~ {treatment_col} + post_treatment + treated_post'
model = smf.ols(did_formula, data=df_window).fit(cov_type='cluster',
cov_kwds={'groups': df_window[unit_col]})
effect = model.params.get('treated_post', np.nan)
print(f" {window}-month window: {effect:.3f}")
robustness_results[f'window_{window}m'] = effect
# 2. Excluding boundary periods
print("\n2. EXCLUDING BOUNDARY PERIODS")
# Exclude first/last month to check sensitivity
df_trimmed = df_did.copy()
first_date = df_trimmed[time_col].min()
last_date = df_trimmed[time_col].max()
df_trimmed = df_trimmed[
(df_trimmed[time_col] > first_date) &
(df_trimmed[time_col] < last_date)
]
if len(df_trimmed) > 0:
did_formula = f'{outcome_col} ~ {treatment_col} + post_treatment + treated_post'
model_trimmed = smf.ols(did_formula, data=df_trimmed).fit(cov_type='cluster',
cov_kwds={'groups': df_trimmed[unit_col]})
effect_trimmed = model_trimmed.params.get('treated_post', np.nan)
print(f" Excluding boundary periods: {effect_trimmed:.3f}")
robustness_results['trimmed_boundaries'] = effect_trimmed
# 3. Alternative control groups
print("\n3. ALTERNATIVE CONTROL GROUP DEFINITIONS")
# If you have multiple potential control groups, test each
# This example assumes binary treatment, but could be extended
# 4. Placebo tests
print("\n4. PLACEBO TESTS")
# Fake treatment date (earlier than actual)
fake_treatment_date = df_did[time_col].min() + (df_did[time_col].max() - df_did[time_col].min()) / 3
df_placebo = df_did[df_did[time_col] < df_did[df_did['post_treatment'] == 1][time_col].min()].copy()
if len(df_placebo) > 0:
df_placebo['fake_post'] = (df_placebo[time_col] >= fake_treatment_date).astype(int)
df_placebo['fake_treated_post'] = df_placebo[treatment_col] * df_placebo['fake_post']
placebo_formula = f'{outcome_col} ~ {treatment_col} + fake_post + fake_treated_post'
placebo_model = smf.ols(placebo_formula, data=df_placebo).fit(cov_type='cluster',
cov_kwds={'groups': df_placebo[unit_col]})
placebo_effect = placebo_model.params.get('fake_treated_post', np.nan)
placebo_p = placebo_model.pvalues.get('fake_treated_post', np.nan)
print(f" Placebo test effect: {placebo_effect:.3f} (p={placebo_p:.3f})")
robustness_results['placebo_effect'] = placebo_effect
robustness_results['placebo_p_value'] = placebo_p
if placebo_p > 0.05:
print(" β
PASS: No placebo effect detected")
else:
print(" β οΈ CONCERN: Significant placebo effect")
# 5. Alternative outcome transformations
print("\n5. OUTCOME TRANSFORMATIONS")
if (df_did[outcome_col] > 0).all():
# Log transformation
df_log = df_did.copy()
df_log[f'log_{outcome_col}'] = np.log(df_log[outcome_col])
log_formula = f'log_{outcome_col} ~ {treatment_col} + post_treatment + treated_post'
log_model = smf.ols(log_formula, data=df_log).fit(cov_type='cluster',
cov_kwds={'groups': df_log[unit_col]})
log_effect = log_model.params.get('treated_post', np.nan)
print(f" Log-transformed effect: {log_effect:.3f} ({np.exp(log_effect)-1:.1%} change)")
robustness_results['log_effect'] = log_effect
return robustness_results
# Perform robustness checks
robustness_results = did_robustness_checks(
df_did, 'copilot_usage', 'tickets_per_week', 'team_id', 'date'
)
Sensitivity Analysis
def did_sensitivity_analysis(df_did, treatment_col, outcome_col, unit_col):
"""
Sensitivity analysis for potential confounders
"""
print("=== SENSITIVITY ANALYSIS ===")
# Test sensitivity to different assumptions about unobserved confounders
# Following Imbens (2003) approach
baseline_effect = basic_did_results['did_effect']
baseline_se = basic_did_results['se']
print(f"Baseline DiD effect: {baseline_effect:.3f} Β± {baseline_se:.3f}")
# Simulate different levels of selection bias
selection_bias_levels = [0, 0.1, 0.2, 0.3, 0.4, 0.5]
print(f"\nSensitivity to unobserved confounders:")
print(f"{'Bias Level':<12} {'Adjusted Effect':<15} {'Still Significant?'}")
print("-" * 40)
for bias in selection_bias_levels:
adjusted_effect = baseline_effect - bias
z_stat = adjusted_effect / baseline_se
still_significant = abs(z_stat) > 1.96
sig_status = "Yes" if still_significant else "No"
print(f"{bias:<12.1f} {adjusted_effect:<15.3f} {sig_status}")
# Calculate minimum bias needed to eliminate significance
min_bias_to_null = baseline_effect - 1.96 * baseline_se
print(f"\nMinimum confounding bias to eliminate significance: {min_bias_to_null:.3f}")
if min_bias_to_null > 0.2:
print("β
Result appears robust to moderate confounding")
elif min_bias_to_null > 0.1:
print("β οΈ Result moderately sensitive to confounding")
else:
print("β Result highly sensitive to confounding")
return {
'baseline_effect': baseline_effect,
'min_bias_to_null': min_bias_to_null,
'sensitivity_levels': selection_bias_levels
}
# Perform sensitivity analysis
sensitivity_results = did_sensitivity_analysis(
df_did, 'copilot_usage', 'tickets_per_week', 'team_id'
)
Advanced DiD Specifications
Two-Way Fixed Effects
def two_way_fixed_effects_did(df_did, treatment_col, outcome_col, unit_col, time_col):
"""
Two-way fixed effects DiD specification
Includes:
- Unit fixed effects (control for time-invariant unit characteristics)
- Time fixed effects (control for common time trends)
"""
# Create time period identifiers for fixed effects
df_twfe = df_did.copy()
df_twfe['time_period'] = df_twfe[time_col].dt.to_period('M')
# Two-way fixed effects regression
twfe_formula = f'{outcome_col} ~ treated_post + C({unit_col}) + C(time_period)'
twfe_model = smf.ols(twfe_formula, data=df_twfe).fit(cov_type='cluster',
cov_kwds={'groups': df_twfe[unit_col]})
twfe_effect = twfe_model.params['treated_post']
twfe_se = twfe_model.bse['treated_post']
twfe_ci = twfe_model.conf_int().loc['treated_post']
print(f"=== TWO-WAY FIXED EFFECTS DiD ===")
print(f"Treatment effect: {twfe_effect:.3f}")
print(f"Standard error: {twfe_se:.3f}")
print(f"95% CI: [{twfe_ci[0]:.3f}, {twfe_ci[1]:.3f}]")
print(f"R-squared: {twfe_model.rsquared:.3f}")
# Compare with basic DiD
basic_effect = basic_did_results['did_effect']
difference = twfe_effect - basic_effect
print(f"\nComparison with basic DiD:")
print(f"Basic DiD effect: {basic_effect:.3f}")
print(f"TWFE effect: {twfe_effect:.3f}")
print(f"Difference: {difference:.3f}")
if abs(difference) < 0.1:
print("β
Results consistent between specifications")
else:
print("β οΈ Notable difference between specifications")
print(" Fixed effects may be capturing important variation")
return {
'twfe_effect': twfe_effect,
'twfe_se': twfe_se,
'model': twfe_model,
'comparison_with_basic': difference
}
# Estimate two-way fixed effects model
twfe_results = two_way_fixed_effects_did(
df_did, 'copilot_usage', 'tickets_per_week', 'team_id', 'date'
)
Business Translation Framework
Converting DiD Results to Business Insights
def translate_did_results_to_business(did_results, outcome_col, treatment_description,
outcome_units="units", time_period="month"):
"""
Translate DiD statistical results into business language
"""
effect = did_results['did_effect']
se = did_results['se']
ci = did_results['ci']
p_value = did_results['p_value']
effect_size = did_results['effect_size']
print(f"=== BUSINESS IMPACT TRANSLATION ===")
print(f"Intervention: {treatment_description}")
print(f"Outcome: {outcome_col.replace('_', ' ').title()}")
# Effect direction and magnitude
if effect > 0:
direction = "increase"
impact_verb = "improved"
else:
direction = "decrease"
impact_verb = "reduced"
effect = abs(effect)
print(f"\nπ HEADLINE FINDING:")
print(f"{treatment_description} led to a {effect:.1f} {outcome_units} {direction}")
print(f"in {outcome_col.replace('_', ' ')} per {time_period}")
# Confidence interval interpretation
print(f"\nπ CONFIDENCE RANGE:")
if ci[0] > 0 or ci[1] < 0: # CI doesn't include zero
print(f"We can be 95% confident the true effect is between")
print(f"{ci[0]:.1f} and {ci[1]:.1f} {outcome_units} per {time_period}")
else:
print(f"The effect could range from {ci[0]:.1f} to {ci[1]:.1f} {outcome_units}")
print(f"(includes possibility of no effect)")
# Statistical confidence
print(f"\nπ― STATISTICAL CONFIDENCE:")
if p_value < 0.001:
confidence_level = "very high confidence (p < 0.001)"
elif p_value < 0.01:
confidence_level = "high confidence (p < 0.01)"
elif p_value < 0.05:
confidence_level = "moderate confidence (p < 0.05)"
else:
confidence_level = "low confidence (p β₯ 0.05)"
print(f"Statistical significance: {confidence_level}")
# Effect size interpretation
print(f"\nπ PRACTICAL SIGNIFICANCE:")
if abs(effect_size) < 0.2:
practical_significance = "small practical impact"
elif abs(effect_size) < 0.5:
practical_significance = "moderate practical impact"
else:
practical_significance = "large practical impact"
print(f"Effect size: {practical_significance}")
# Business implications
print(f"\nπΌ BUSINESS IMPLICATIONS:")
if p_value < 0.05 and abs(effect_size) >= 0.2:
print(f"β
RECOMMENDATION: Scale {treatment_description}")
print(f" - Statistically significant with meaningful impact")
print(f" - Expected {direction} of {effect:.1f} {outcome_units} per {time_period}")
# ROI considerations
print(f"\nπ° ROI CONSIDERATIONS:")
print(f" - Calculate implementation costs vs. {effect:.1f} {outcome_units} benefit")
print(f" - Consider scalability across {df_did[unit_col].nunique()} units")
elif p_value < 0.05 and abs(effect_size) < 0.2:
print(f"β οΈ RECOMMENDATION: Consider cost-benefit analysis")
print(f" - Statistically significant but small practical impact")
print(f" - Evaluate if {effect:.1f} {outcome_units} improvement justifies costs")
else:
print(f"β RECOMMENDATION: Do not scale based on current evidence")
print(f" - Insufficient evidence of meaningful impact")
print(f" - Consider longer evaluation period or alternative approaches")
# Implementation guidance
print(f"\nπ IMPLEMENTATION GUIDANCE:")
if effect > 0 and p_value < 0.05:
print(f" - Pilot expansion to similar units")
print(f" - Monitor key metrics during rollout")
print(f" - Expected benefit: {effect:.1f} {outcome_units} per unit per {time_period}")
return {
'headline_effect': f"{effect:.1f} {outcome_units} {direction}",
'confidence_level': confidence_level,
'practical_significance': practical_significance,
'recommendation': 'scale' if (p_value < 0.05 and abs(effect_size) >= 0.2) else 'evaluate' if p_value < 0.05 else 'do_not_scale'
}
# Translate results to business language
business_translation = translate_did_results_to_business(
basic_did_results,
'tickets_per_week',
'GitHub Copilot implementation',
'tickets',
'week'
)
When DiD May Not Work
Diagnostic Checks
def did_diagnostic_summary(trends_test, event_study_results, robustness_results):
"""
Comprehensive diagnostic summary for DiD validity
"""
print("=== DIFFERENCE-IN-DIFFERENCES DIAGNOSTIC SUMMARY ===")
issues = []
warnings = []
# 1. Parallel trends
if not trends_test['trends_valid']:
issues.append("Parallel trends assumption violated")
# 2. Pre-treatment effects in event study
if event_study_results['pre_treatment_test'].pvalue < 0.05:
issues.append("Significant pre-treatment effects detected")
# 3. Placebo test
if 'placebo_p_value' in robustness_results and robustness_results['placebo_p_value'] < 0.05:
issues.append("Placebo test shows significant effect")
# 4. Sensitivity to specification
window_effects = [robustness_results.get(f'window_{w}m') for w in [1, 3, 6]]
window_effects = [e for e in window_effects if not pd.isna(e)]
if len(window_effects) > 1:
window_range = max(window_effects) - min(window_effects)
if window_range > abs(np.mean(window_effects)) * 0.5:
warnings.append("Results sensitive to time window specification")
# Summary
print(f"\nπ DIAGNOSTIC RESULTS:")
if not issues and not warnings:
print("β
ALL DIAGNOSTICS PASSED")
print(" DiD assumptions appear satisfied")
validity_score = "HIGH"
elif issues:
print("β MAJOR ISSUES DETECTED:")
for issue in issues:
print(f" - {issue}")
validity_score = "LOW"
print("\nβ οΈ RECOMMENDATIONS:")
print(" - Consider alternative identification strategies")
print(" - Instrumental variables if available")
print(" - Synthetic control methods")
print(" - Regression discontinuity if applicable")
else:
print("β οΈ MINOR CONCERNS:")
for warning in warnings:
print(f" - {warning}")
validity_score = "MODERATE"
print("\nπ RECOMMENDATIONS:")
print(" - Report sensitivity analyses")
print(" - Consider robustness checks")
print(" - Triangulate with other methods if possible")
print(f"\nπ OVERALL VALIDITY: {validity_score}")
return {
'validity_score': validity_score,
'issues': issues,
'warnings': warnings
}
# Generate comprehensive diagnostic summary
if 'trends_test' in locals() and 'event_study_results' in locals():
diagnostic_summary = did_diagnostic_summary(trends_test, event_study_results, robustness_results)
Navigation:
Difference-in-Differences leverages natural experiments and timing variation to identify causal effects. Always validate the parallel trends assumption and conduct comprehensive robustness checks before making business recommendations.