From afd8d644a780533a967693f54af3911a931526f7 Mon Sep 17 00:00:00 2001
From: Nick Lucius <nicklucius@gmail.com>
Date: Fri, 5 May 2017 22:34:31 -0500
Subject: [PATCH] download non-tabular datasets with export.socrata #126

---
 DESCRIPTION  |  2 +-
 NAMESPACE    |  2 ++
 R/RSocrata.R | 43 ++++++++++++++++++++++++++++++++-----------
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 32fdfc4..0c38881 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -10,7 +10,7 @@ Description: Provides easier interaction with
     format and manages throttling by 'Socrata'.
     Users can upload data to Socrata portals directly
     from R.
-Version: 1.8.0-1
+Version: 1.8.0-2
 Date: 2017-05-05
 Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc
 Maintainer: "Tom Schenk Jr." <developers@cityofchicago.org>
diff --git a/NAMESPACE b/NAMESPACE
index a9900d0..82595fd 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(export.socrata)
 export(fieldName)
 export(isFourByFour)
 export(ls.socrata)
@@ -17,3 +18,4 @@ importFrom(jsonlite,fromJSON)
 importFrom(mime,guess_type)
 importFrom(plyr,rbind.fill)
 importFrom(utils,read.csv)
+importFrom(utils,write.csv)
diff --git a/R/RSocrata.R b/R/RSocrata.R
index 5b35358..2ec8840 100644
--- a/R/RSocrata.R
+++ b/R/RSocrata.R
@@ -469,6 +469,8 @@ write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email,
 #' @param url - the base URL of a domain (e.g., "data.cityofchicago.org")
 #' @return a Gzipped file with the four-by-four and timestamp of when the download began in filename
 #' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org}
+#' @importFrom httr GET
+#' @importFrom utils write.csv
 #' @export
 export.socrata <- function(url) {
   dir.create(basename(url), showWarnings = FALSE) # Create directory based on URL
@@ -480,16 +482,35 @@ export.socrata <- function(url) {
     
     # Download data
     downloadUrl <- ls$distribution[[i]]$downloadURL[1] # Currently grabs CSV, which is the first element
-    d <- read.socrata(downloadUrl)
-    
-    # Construct the filename output
-    downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
-    downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
-    filename <- httr::parse_url(ls$identifier[i])
-    filename$path <- substr(filename$path, 11, 19)
-    filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format, ".gz")
-    
-    # Write file
-    write.csv(d, file = gzfile(filename))
+    if (grepl(".csv", downloadUrl)) {
+      d <- read.socrata(downloadUrl)
+      
+      # Construct the filename output
+      default_format <- "csv"
+      downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
+      downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
+      filename <- httr::parse_url(ls$identifier[i])
+      filename$path <- substr(filename$path, 11, 19)
+      filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format, ".gz")
+      
+      # Write file
+      write.csv(d, file = gzfile(filename))
+      
+    } else {
+      response <- GET(downloadUrl)
+
+      # Construct the filename output
+      default_format <- response$headers$`content-disposition`
+      default_format <- strsplit(default_format, "filename=")[[1]][2]
+      downloadTimeChr <- gsub('\\s+','_',downloadTime) # Remove spaces and replaces with underscore
+      downloadTimeChr <- gsub(':', '', downloadTimeChr) # Removes colon from timestamp to be valid filename
+      filename <- httr::parse_url(ls$identifier[i])
+      filename$path <- substr(filename$path, 11, 19)
+      filename <- paste0(filename$hostname, "/", filename$path, "_", downloadTimeChr, ".", default_format)
+      
+      # Write file
+      writeBin(response$content, filename)
+    }
+
   }
 }
\ No newline at end of file