From 4efc59258336d8df47d2f97904ca524b518746c1 Mon Sep 17 00:00:00 2001
From: Tom Schenk Jr <tom.schenk@cityofchicago.org>
Date: Fri, 5 May 2017 17:32:29 -0500
Subject: [PATCH] Included alpha code for export.socrata

---
 DESCRIPTION  |  4 ++--
 R/RSocrata.R | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 30a9898..32fdfc4 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -10,8 +10,8 @@ Description: Provides easier interaction with
     format and manages throttling by 'Socrata'.
     Users can upload data to Socrata portals directly
     from R.
-Version: 1.7.2-12
-Date: 2017-03-16
+Version: 1.8.0-1
+Date: 2017-05-05
 Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc
 Maintainer: "Tom Schenk Jr." <developers@cityofchicago.org>
 Depends:
diff --git a/R/RSocrata.R b/R/RSocrata.R
index d0542e3..5b35358 100644
--- a/R/RSocrata.R
+++ b/R/RSocrata.R
@@ -458,3 +458,38 @@ write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email,
   return(response)
   
 }
+
+#' Exports CSVs from Socrata data portals
+#' 
+#' Input the URL of a data portal (e.g., "data.cityofchicago.org") and
+#' will download all CSV files (no other files supported) and saved in
+#' a single directory named after the root URL (e.g., "data.cityofchicago.org/").
+#' Downloaded files are compressed to GZip format and timestamped so the download
+#' time is saved. No data is saved within the R workspace.
+#' @param url - the base URL of a domain (e.g., "data.cityofchicago.org")
+#' @return a Gzipped file with the four-by-four and timestamp of when the download began in filename
+#' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org}
+#' @export
+export.socrata <- function(url) {
+  dir.create(basename(url), showWarnings = FALSE) # Create directory based on URL
+  ls <- ls.socrata(url = url)
+  for (i in 1:dim(ls)[1]) {
+    # Track timestamp before download
+    downloadTime <- Sys.time()
+    downloadTz <- Sys.timezone()
+    
+    # Download data
+    downloadUrl <- ls$distribution[[i]]$downloadURL[1] # Currently grabs CSV, which is the first element
+    d <- read.socrata(downloadUrl)
+    
+    # Construct the filename output
+    downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
+    downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
+    filename <- httr::parse_url(ls$identifier[i])
+    filename$path <- substr(filename$path, 11, 19)
+    filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format, ".gz")
+    
+    # Write file
+    write.csv(d, file = gzfile(filename))
+  }
+}
\ No newline at end of file