library(tidyverse)
library(wordbankr)
library(here)
my_sounds <- c("meow", "woof woof", "cockadoodledoo")
# first get the items in the animal category
sounds <- get_item_data(language = "English (American)",
form = "WG") %>%
filter(definition %in% my_sounds)
# then get instrument data for those items
sounds_data <- get_instrument_data(
language = "English (American)",
form = "WG",
items = sounds$item_id,
administrations = TRUE,
iteminfo = TRUE
) %>%
mutate(produces = value == "produces",
understands = case_when(
produces == TRUE | value == "understands" ~ TRUE,
TRUE ~ FALSE
)) %>%
drop_na(produces) %>%
rename(sound = uni_lemma)
# what proportion of kids at each age understand/produce each word?
sounds_summary <- sounds_data %>%
group_by(age, sound) %>%
summarise(kids_produce = sum(produces, na.rm = TRUE),
kids_understand = sum(understands, na.rm = TRUE),
kids_respond = n_distinct(data_id),
prop_produce = kids_produce / kids_respond,
prop_understand = kids_understand / kids_respond)
Now let’s export both data frames for the lab.
write_csv(sounds_data, here::here("data",
"animal_sounds_data.csv"))
write_csv(sounds_summary, here::here("data",
"animal_sounds_summary.csv"))