This week's data is from the Department of Education courtesy of Alex Albright.
Data idea comes from Dignity and Debt who is running a contest around data viz for understanding and spreading awareness around Student Loan debt.
There are already some gorgeous plots in the style of DuBois.
I have uploaded the raw data and the clean data - definitely a nice dive into some data cleansing if you want to have a go at the raw Excel files.
loans <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2019/2019-11-26/loans.csv")
# Or read in with tidytuesdayR package (https://github.com/dslc-io/tidytuesdayR)
# Either ISO-8601 date or year/week works!
# Install via pak::pak("dslc-io/tidytuesdayR")
tuesdata <- tidytuesdayR::tt_load("2019-11-26")
tuesdata <- tidytuesdayR::tt_load(2019, week = 48)
loans <- tuesdata$loans
variable | class | description |
---|---|---|
agency_name | character | Name of loan agency |
year | integer | two digits year |
quarter | integer | Quarter (3 month period) |
starting | double | Total value in dollars at start of quarter |
added | double | Total value added during quarter |
total | double | Total dollars repaid |
consolidation | double | Consolidation reflects the dollar value of loans consolidated |
rehabilitation | double | Rehabilitation reflects the dollar value of loans rehabilitated |
voluntary_payments | double | Voluntary payments reflects the total amount of payments received from borrowers |
wage_garnishments | double | Wage Garnishments reflect the total amount of wage garnishment payments |
library(tidyverse)
library(here)
library(readxl)
short_file_names <- list.files(here("2019", "2019-11-26")) %>%
str_remove_all(".xls|.xlsx") %>%
str_remove_all("PCA_Report_|pca-report-") %>%
tolower() %>%
str_replace("default_recoveries_pca", "fy15q4")
short_files <- short_file_names %>%
str_detect("loans") %>%
discard(short_file_names, .)
all_files <- list.files(here("2019", "2019-11-26")) %>%
map_chr(~ paste0("2019/2019-11-26/", .x))
files <- all_files %>%
str_detect("loans") %>%
discard(all_files, .)
names_clean <- files[[2]] %>%
read_excel(skip = 4) %>%
janitor::clean_names() %>%
names() %>%
str_replace("at_start_of_quarter", "starting")
list_df <- map(.x = files, .f = read_excel, skip = 4) %>%
setNames(short_files) %>%
modify_at(.at = "fy15q4", .f = select,
`Agency Name`:Added, Total, everything())
all_df <- map(.x = files, .f = read_excel, skip = 4) %>%
setNames(short_files) %>%
modify_at(.at = "fy15q4", .f = select,
`Agency Name`:Added, Total, everything()) %>%
map(set_names, nm = names_clean) %>%
map(~ filter(.x, !is.na(starting))) %>%
map(~ filter(.x, starting != "At Start of Quarter")) %>%
map2(.x = ., .y = names(.), ~ mutate(.x, quarter = .y)) %>%
map(~ select(.x, agency_name, quarter, starting, added, total, consolidation,
rehabilitation, voluntary_payments, wage_garnishments)) %>%
map(~ mutate_at(.x,
.vars = vars(starting:wage_garnishments),
.funs = as.double
))
clean_df <- all_df %>%
reduce(.f = bind_rows) %>%
mutate(quarter = str_remove(quarter, "fy")) %>%
separate(col = quarter,
into = c("year", "quarter"),
sep = "q") %>%
mutate(quarter = str_remove(quarter, "x")) %>%
mutate(quarter = as.integer(quarter),
year = as.integer(year)) %>%
filter(agency_name != "Total") %>%
na_if(0)
clean_df %>%
View()
clean_df %>%
write_csv(here("2019", "2019-11-26", "loans.csv"))