From 4efc59258336d8df47d2f97904ca524b518746c1 Mon Sep 17 00:00:00 2001 From: Tom Schenk Jr Date: Fri, 5 May 2017 17:32:29 -0500 Subject: [PATCH] Included alpha code for export.socrata --- DESCRIPTION | 4 ++-- R/RSocrata.R | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 30a9898..32fdfc4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,8 +10,8 @@ Description: Provides easier interaction with format and manages throttling by 'Socrata'. Users can upload data to Socrata portals directly from R. -Version: 1.7.2-12 -Date: 2017-03-16 +Version: 1.8.0-1 +Date: 2017-05-05 Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc Maintainer: "Tom Schenk Jr." Depends: diff --git a/R/RSocrata.R b/R/RSocrata.R index d0542e3..5b35358 100644 --- a/R/RSocrata.R +++ b/R/RSocrata.R @@ -458,3 +458,38 @@ write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email, return(response) } + +#' Exports CSVs from Socrata data portals +#' +#' Input the URL of a data portal (e.g., "data.cityofchicago.org") and +#' will download all CSV files (no other files supported) and saved in +#' a single directory named after the root URL (e.g., "data.cityofchicago.org/"). +#' Downloaded files are compressed to GZip format and timestamped so the download +#' time is saved. No data is saved within the R workspace. +#' @param url - the base URL of a domain (e.g., "data.cityofchicago.org") +#' @return a Gzipped file with the four-by-four and timestamp of when the download began in filename +#' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org} +#' @export +export.socrata <- function(url) { + dir.create(basename(url), showWarnings = FALSE) # Create directory based on URL + ls <- ls.socrata(url = url) + for (i in 1:dim(ls)[1]) { + # Track timestamp before download + downloadTime <- Sys.time() + downloadTz <- Sys.timezone() + + # Download data + downloadUrl <- ls$distribution[[i]]$downloadURL[1] # Currently grabs CSV, which is the first element + d <- read.socrata(downloadUrl) + + # Construct the filename output + downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore + downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename + filename <- httr::parse_url(ls$identifier[i]) + filename$path <- substr(filename$path, 11, 19) + filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format, ".gz") + + # Write file + write.csv(d, file = gzfile(filename)) + } +} \ No newline at end of file