Skip to content

Commit

Permalink
Rebase dev based on hot fix from master
Browse files Browse the repository at this point in the history
Merge branch 'master' into dev

# Conflicts:
#	DESCRIPTION
  • Loading branch information
tomschenkjr committed Jan 16, 2018
2 parents c338e0f + d25acb2 commit d825f7e
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 34 deletions.
10 changes: 5 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ Package: RSocrata
Type: Package
Title: Download or Upload 'Socrata' Data Sets
Description: Provides easier interaction with
Socrata open data portals <http://dev.socrata.com>.
'Socrata' open data portals <http://dev.socrata.com>.
Users can provide a 'Socrata' data set resource URL,
or a 'Socrata' Open Data API (SoDA) web query,
or a 'Socrata' "human-friendly" URL,
returns an R data frame. Converts dates to 'POSIX'
format and manages throttling by 'Socrata'.
Users can upload data to Socrata portals directly
Users can upload data to 'Socrata' portals directly
from R.
Version: 1.8.0-5
Date: 2017-06-23
Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., and John Malc
Version: 1.8.0-8
Date: 2018-01-15
Author: Hugh Devlin, Ph. D., Tom Schenk, Jr., Gene Leynes, Nick Lucius, John Malc, Mark Silverberg, and Peter Schmeideskamp
Maintainer: "Tom Schenk Jr." <developers@cityofchicago.org>
Depends:
R (>= 3.3.0)
Expand Down
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,18 @@ export(read.socrata)
export(validateUrl)
export(write.socrata)
importFrom(httr,GET)
importFrom(httr,POST)
importFrom(httr,PUT)
importFrom(httr,add_headers)
importFrom(httr,authenticate)
importFrom(httr,build_url)
importFrom(httr,content)
importFrom(httr,http_status)
importFrom(httr,parse_url)
importFrom(httr,stop_for_status)
importFrom(httr,user_agent)
importFrom(jsonlite,fromJSON)
importFrom(mime,guess_type)
importFrom(plyr,rbind.fill)
importFrom(utils,packageVersion)
importFrom(utils,read.csv)
59 changes: 53 additions & 6 deletions R/RSocrata.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,38 @@ logMsg <- function(s) {
cat(format(Sys.time(), "%Y-%m-%d %H:%M:%OS3 "), as.character(sys.call(-1))[1], ": ", s, '\n', sep='')
}

#' Compiles the information to be used in HTTP headers
#'
#' Grabs the headers (RSocrata version, OS, and R version) to be used while
#' making HTTP requests with Socrata. This enables Socrata's team to track
#' the usage of RSocrata.
#' @return a string
#' @importFrom utils packageVersion
#' @author Tom Schenk Jr \email{tom.schenk@@cityofchicago.org}
#' @noRd
fetch_user_agent <- function() {
rSocrataVersion <- packageVersion("RSocrata")
operatingSystem <- Sys.info()[["sysname"]]
operatingSystemVersion <- paste(Sys.info()[["release"]], Sys.info()[["version"]])
rVersion <- paste0(R.version$major,
".",
R.version$minor,
ifelse( # Checks if version has status, e.g., "rev"
R.version$status == "",
"",
paste0("-",R.version$status))
)

header <- paste0( "RSocrata/",
rSocrataVersion, " (",
operatingSystem, "/",
operatingSystemVersion, "; ",
"R/", rVersion,
")"
)
return(header)
}

#' Checks the validity of the syntax for a potential Socrata dataset Unique Identifier, also known as a 4x4.
#'
#' Will check the validity of a potential dataset unique identifier
Expand Down Expand Up @@ -79,6 +111,16 @@ validateUrl <- function(url, app_token) {
parsedUrl$query <- NULL
}

# if /data appended to URL, remove it
pathLength <- nchar(parsedUrl$path)
if(substr(parsedUrl$path, pathLength - 4, pathLength) == '/data') {
parsedUrl$path <- substr(parsedUrl$path, 1, pathLength - 5)
}
if(substr(parsedUrl$path, pathLength - 5, pathLength) == '/data/') {
parsedUrl$path <- substr(parsedUrl$path, 1, pathLength - 6)
}


fourByFour <- basename(parsedUrl$path)
if(!isFourByFour(fourByFour))
stop(fourByFour, " is not a valid Socrata dataset unique identifier.")
Expand Down Expand Up @@ -162,15 +204,15 @@ no_deniro <- function(x) {
#' @param email - Optional. The email to the Socrata account with read access to the dataset.
#' @param password - Optional. The password associated with the email to the Socrata account
#' @return httr response object
#' @importFrom httr http_status GET content stop_for_status
#' @importFrom httr http_status GET content stop_for_status user_agent
#' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org}
#' @noRd
getResponse <- function(url, email = NULL, password = NULL) {

if(is.null(email) && is.null(password)){
response <- httr::GET(url)
response <- httr::GET(url, httr::user_agent(fetch_user_agent()))
} else { # email and password are not NULL
response <- httr::GET(url, httr::authenticate(email, password))
response <- httr::GET(url, httr::authenticate(email, password), httr::user_agent(fetch_user_agent()))
}

# status <- httr::http_status(response)
Expand Down Expand Up @@ -355,15 +397,18 @@ read.socrata <- function(url, app_token = NULL, email = NULL, password = NULL,
#' # Check schema definition for metadata
#' attributes(df)
#' @importFrom jsonlite fromJSON
#' @importFrom httr parse_url
#' @importFrom httr GET build_url parse_url content user_agent
#' @export
ls.socrata <- function(url) {
url <- as.character(url)
parsedUrl <- httr::parse_url(url)
if(is.null(parsedUrl$scheme) | is.null(parsedUrl$hostname))
stop(url, " does not appear to be a valid URL.")
parsedUrl$path <- "data.json"
data_dot_json <- jsonlite::fromJSON(httr::build_url(parsedUrl))
#Download data
response <- httr::GET(httr::build_url(parsedUrl), httr::user_agent(fetch_user_agent()))
data_dot_json <- jsonlite::fromJSON(content(response, "text"))

data_df <- as.data.frame(data_dot_json$dataset)
# Assign Catalog Fields as attributes
attr(data_df, "@context") <- data_dot_json$`@context`
Expand All @@ -389,20 +434,22 @@ ls.socrata <- function(url) {
#' @param password - password associated with Socrata account (will need write access to dataset)
#' @param app_token - optional app_token associated with Socrata account
#' @return httr a response object
#' @importFrom httr GET
#' @importFrom httr GET POST PUT authenticate user_agent add_headers
#'
#' @noRd
checkUpdateResponse <- function(json_data_to_upload, url, http_verb, email, password, app_token = NULL) {
if(http_verb == "POST"){
response <- httr::POST(url,
body = json_data_to_upload,
httr::authenticate(email, password),
httr::user_agent(fetch_user_agent()),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) #, verbose())
} else if(http_verb == "PUT"){
response <- httr::PUT(url,
body = json_data_to_upload,
httr::authenticate(email, password),
httr::user_agent(fetch_user_agent()),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) # , verbose())
}
Expand Down
4 changes: 3 additions & 1 deletion man/ls.socrata.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

50 changes: 28 additions & 22 deletions tests/testthat/test-all.R
Original file line number Diff line number Diff line change
Expand Up @@ -234,13 +234,16 @@ test_that("Read URL provided by data.json from ls.socrata() - JSON", {
expect_equal(9, ncol(df), label="columns")
})

test_that("Read data with missing dates", { # See issue #24 & #27
# Query below will pull Boston's 311 requests from early July 2011. Contains NA dates.
df <- read.socrata("https://data.cityofboston.gov/resource/awu8-dc52.csv?$where=case_enquiry_id< 101000295717")
expect_equal(99, nrow(df), label="rows")
na_time_rows <- df[is.na(df$TARGET_DT), ]
expect_equal(33, length(na_time_rows), label="rows with missing TARGET_DT dates")
})
# This test is commented out because of issue #137 as a temporary work-around.
# Test should be re-enabled in the future with a work-around.
#
# test_that("Read data with missing dates", { # See issue #24 & #27
# # Query below will pull Boston's 311 requests from early July 2011. Contains NA dates.
# df <- read.socrata("https://data.cityofboston.gov/resource/awu8-dc52.csv?$where=case_enquiry_id< 101000295717")
# expect_equal(99, nrow(df), label="rows")
# na_time_rows <- df[is.na(df$TARGET_DT), ]
# expect_equal(33, length(na_time_rows), label="rows with missing TARGET_DT dates")
# })

test_that("format is not supported", {
# Unsupported data formats
Expand Down Expand Up @@ -419,6 +422,17 @@ test_that("incorrect API Query Human Readable", {
expect_equal(9, ncol(df), label="columns")
})

context("URL suffixes from Socrata are handled")

test_that("Handle /data suffix", {
df1 <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj/data')
expect_equal(1007, nrow(df1), label="rows")
expect_equal(9, ncol(df1), label="columns")
df2 <- read.socrata('https://soda.demo.socrata.com/dataset/USGS-Earthquake-Reports/4334-bgaj/data/')
expect_equal(1007, nrow(df2), label="rows")
expect_equal(9, ncol(df2), label="columns")
})

context("ls.socrata functions correctly")

test_that("List datasets available from a Socrata domain", {
Expand Down Expand Up @@ -478,14 +492,11 @@ test_that("add a row to a dataset", {
df_in <- data.frame(x,y)

# write to dataset
write.socrata(df_in,datasetToAddToUrl,"UPSERT",socrataEmail,socrataPassword)

# read from dataset and store last (most recent) row for comparisons / tests
df_out <- read.socrata(url = datasetToAddToUrl, email = socrataEmail, password = socrataPassword)
df_out_last_row <- tail(df_out, n=1)
res <- write.socrata(df_in,datasetToAddToUrl,"UPSERT",socrataEmail,socrataPassword)

# Check that the dataset was written without error
expect_equal(res$status_code, 200L)

expect_equal(df_in$x, as.numeric(df_out_last_row$x), label = "x value")
expect_equal(df_in$y, as.numeric(df_out_last_row$y), label = "y value")
})


Expand All @@ -498,15 +509,10 @@ test_that("fully replace a dataset", {
df_in <- data.frame(x,y)

# write to dataset
write.socrata(df_in,datasetToReplaceUrl,"REPLACE",socrataEmail,socrataPassword)

# read from dataset for comparisons / tests
df_out <- read.socrata(url = datasetToReplaceUrl, email = socrataEmail, password = socrataPassword)
res <- write.socrata(df_in,datasetToReplaceUrl,"REPLACE",socrataEmail,socrataPassword)

expect_equal(ncol(df_in), ncol(df_out), label="columns")
expect_equal(nrow(df_in), nrow(df_out), label="rows")
expect_equal(df_in$x, as.numeric(df_out$x), label = "x values")
expect_equal(df_in$y, as.numeric(df_out$y), label = "y values")
# Check that the dataset was written without error
expect_equal(res$status_code, 200L)
})


Expand Down

0 comments on commit d825f7e

Please sign in to comment.