library(datamations)
library(dplyr)

group_by()

One grouping variable

"small_salary %>%
  group_by(Work)" %>%
  datamation_sanddance()

Two grouping variables

"small_salary %>%
  group_by(Work, Degree)" %>%
  datamation_sanddance()

Three grouping variables

library(palmerpenguins)

"penguins %>%
  group_by(sex, island, species)" %>%
  datamation_sanddance()

summarise()

mean of a variable

"small_salary %>%
  group_by(Work) %>%
  summarise(mean_salary = mean(Salary))" %>%
  datamation_sanddance()
#> Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
#> dplyr 1.1.0.
#>  Please use `reframe()` instead.
#>  When switching from `summarise()` to `reframe()`, remember that `reframe()`
#>   always returns an ungrouped data frame and adjust accordingly.
#>  The deprecated feature was likely used in the datamations package.
#>   Please report the issue to the authors.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

Other summary functions

"small_salary %>%
  group_by(Work) %>%
  summarise(median_salary = median(Salary))" %>%
  datamation_sanddance()
"small_salary %>%
  group_by(Degree) %>%
  summarise(quan = quantile(Salary, probs = 0.01))" %>%
  datamation_sanddance()
"small_salary %>%
  group_by(Degree) %>%
  summarise(sum = sum(Salary))" %>%
  datamation_sanddance()

filter()

Filtering initial data

"small_salary %>%
  filter(Salary > 90)" %>%
  datamation_sanddance()

Filtering within groups

"small_salary %>%
  group_by(Work) %>%
  filter(Salary == mean(Salary))" %>%
  datamation_sanddance()

Filtering after summarize

"small_salary %>%
  group_by(Work) %>%
  summarise(median_salary = median(Salary)) %>%
  filter(median_salary > 90)"  %>%
  datamation_sanddance()

count()

"small_salary %>%
  count(Work)" %>%
  datamation_sanddance()

Binary variables

A basic example of a dataframe containing data that represents Simpson’s paradox.

head(jeter_justice)
#>        player year is_hit
#> 1 Derek Jeter 1995      1
#> 2 Derek Jeter 1995      1
#> 3 Derek Jeter 1995      1
#> 4 Derek Jeter 1995      1
#> 5 Derek Jeter 1995      1
#> 6 Derek Jeter 1995      1

In this datamation, Jeter has a higher batting average than Justice overall.

'jeter_justice %>%
  group_by(player) %>%
  summarize(batting_average = mean(is_hit),
            se = sqrt(batting_average * (1 - batting_average) / n()) )' %>%
  datamation_sanddance()



In this datamation, the visual shows that Justice has a higher batting average than Jeter within each year.

'jeter_justice %>%
  group_by(player, year) %>%
  summarize(batting_average = mean(is_hit),
            se = sqrt(batting_average * (1 - batting_average) / n()) )' %>%
  datamation_sanddance()