library(tidyverse)
moma <- read_csv("http://bit.ly/cs631-moma")

moma_singles_summary <- moma %>% 
  filter(num_artists == 1) %>% 
  mutate(lag_acquired = year_acquired - year_created,
         artist_gender = as.factor(artist_gender)) %>% 
  group_by(year_acquired, artist_gender) %>% 
  summarize(n_acquired = n(),
            median_lag = median(lag_acquired)) %>% 
  complete(year_acquired, artist_gender, 
           fill = list(n_acquired = 0))

ggplot(moma_singles_summary, aes(x = year_acquired, 
                                 y = n_acquired,
                                 color = artist_gender)) +
  geom_point()

ggplot(moma_singles_summary, aes(x = year_acquired, 
                                 y = median_lag,
                                 color = artist_gender,
                                 group = artist_gender)) +
  geom_point() +
  geom_line()

Tag top artists

top_artists <- moma %>% 
  count(artist_gender, artist, sort = TRUE) %>% 
  group_by(artist_gender) %>% 
  top_n(1) %>% 
  filter(!is.na(artist_gender)) %>% 
  pull(artist)

moma <- moma %>% 
  mutate(top_to_color = case_when(
    artist %in% top_artists & artist_gender == "Male" ~ "Picasso",
    artist %in% top_artists & artist_gender == "Female" ~ "Levine"
  ))

moma_top <- moma %>% 
  filter(!is.na(top_to_color))

moma_else <- moma %>% 
  filter(is.na(top_to_color))

# need to separate plotting of "NA" points
# plot Levine and Picasso points with separate geom layer
ggplot(data = NULL, aes(year_created, year_acquired)) +
  geom_point(data = moma_else, color = "grey70", alpha = .5) +
  geom_point(data = moma_top, aes(color = top_to_color), size = 2) +
  labs(x = "Year Painted", y = "Year Acquired") +
  ggtitle("MoMA Keeps Its Collection Current") 

Creative Commons License