Skip to content

Commit

Permalink
Merge pull request #32 from jread-usgs/master
Browse files Browse the repository at this point in the history
issues from code review
  • Loading branch information
Jordan S Read committed Sep 10, 2015
2 parents 700f147 + 1462961 commit 4d46554
Show file tree
Hide file tree
Showing 12 changed files with 68 additions and 141 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@ Copyright: This software is in the public domain because it contains materials
official USGS copyright policy at
http://www.usgs.gov/visual-id/credit_usgs.html#copyright
Imports:
yaml
yaml,
dplyr,
lazyeval
Suggests:
testthat
LazyLoad: yes
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,14 @@ S3method(window,sensor)
export(MAD)
export(flag)
export(flag.data.frame)
export(persist)
export(read)
export(read.default)
export(sensor)
importFrom(dplyr,"%>%")
importFrom(dplyr,group_by_)
importFrom(dplyr,mutate_)
importFrom(lazyeval,as.lazy)
importFrom(stats,window)
importFrom(tools,file_ext)
importFrom(yaml,yaml.load_file)
20 changes: 8 additions & 12 deletions R/custom-functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,8 @@ MAD.values <- function(vals, b = 1.4826){
MAD.windowed <- function(vals, windows){
stopifnot(length(vals) == length(windows))

# what is the underlying distribution? (important for assigning "b")

MAD.out <- vector(length=length(vals))
un.win <- unique(windows)

for (i in 1:length(un.win)){
win.i <- un.win[i]
val.i <- windows == win.i
MAD.out[val.i] = MAD.values(vals[val.i])
}
return(MAD.out)
mad <- group_by_(data.frame(x=vals,w=windows), 'w') %>% mutate_(mad='sensorQC:::MAD.values(x)') %>% .$mad
return(mad)
}
#'@title median absolute deviation outlier test
#'@name MAD
Expand All @@ -37,6 +28,7 @@ MAD.windowed <- function(vals, windows){
#'@param windows vector of equal length to x specifying windows
#'@return a vector of MAD normalized values relative to an undefined rejection criteria (usually 2.5 or 3).
#'@keywords MAD
#'@importFrom dplyr group_by_ mutate_ %>%
#'@author
#'Jordan S. Read
#'@export
Expand All @@ -49,7 +41,11 @@ MAD <- function(x, w){

}


#' @export
persist <- function(x){
tmp <- rle(x)
rep(tmp$lengths,times = tmp$lengths)
}

call.cv <- function(data.in){
CV <- 100*sd(data.in)/mean(data.in)
Expand Down
26 changes: 0 additions & 26 deletions R/expression-helpers.R

This file was deleted.

46 changes: 7 additions & 39 deletions R/flag_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,18 @@ calc_flags <- function(x, ...){
UseMethod('calc_flags')
}

#' @importFrom dplyr mutate_
#' @importFrom lazyeval as.lazy
#' @export
calc_flags.sensor <- function(sensor, expr, which.flagged=TRUE){
flags <- sqc(expr=expr, vals=values(sensor), windows=windows(sensor))

flags <- mutate_(sensor$sensor[-1], flags = lazyeval::as.lazy(expr, globalenv()))$flags

if (!inherits(flags, 'logical'))
stop(expr,' failed to generate booleans')
#check for class of flags
if (which.flagged)
return(which(flags))
else
return(flags)
}


sqc <- function(expr, vals, windows, ...){

expr = tryCatch({
parse(text = expr)
}, error = function(e) {
stop(paste0('error evaluation expression ',expr))
})

vals = set.args(expr, vals, windows)

flags <- eval(expr, envir=vals)

return(flags & is.finite(flags) & !is.na(flags))
}

set.args <- function(expr, vals, windows){
val.call <- function(x){
do.call(paste0('to.',x), list(vals=vals, windows=windows))
}
arg.names = expr_var(expr)
args = sapply(arg.names, val.call)
return(setNames(args, arg.names))
}

to.n <- function(vals, ...){
tmp <- rle(vals)
list('n'=rep(tmp$lengths,times = tmp$lengths))
}

to.x <- function(vals, ...){
list('x'=vals)
}

to.w <- function(..., windows){
list('w'=windows)
}
2 changes: 1 addition & 1 deletion R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ read.wide_burst <- function(file,date.format){
}
date.vec <- head(date.vec,cnt-1)
sens.vec <- head(sens.vec,cnt-1)
data.out <- data.frame('DateTime'=date.vec, 'sensor.obs'=sens.vec)
data.out <- data.frame('DateTime'=date.vec, 'x'=sens.vec)

# should we also return metadata?
return(data.out)
Expand Down
1 change: 1 addition & 0 deletions R/sensor-class.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ sensor <- function(x, flag.defs, ...){

#' @export
sensor.data.frame <- function(x, flag.defs = NULL, ...){
names(x) <- c('times','x','w')[seq_len(length(names(x)))]
sensor = list(sensor=x)
flags = define_flags(flag.defs,...)
if (!is.null(flags))
Expand Down
6 changes: 3 additions & 3 deletions R/window.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ auto.chunk.time <- function(data.in){

# finds natural breaks in time sequence of data
data.in = as.data.frame(data.in[1:2])
t.steps <- as.numeric(diff(data.in$DateTime))
t.steps <- as.numeric(diff(data.in$times))
###### re-write this!!
MAD.norm <- MAD.values(t.steps) # deal with NAs?
break.i <- MAD.norm > 2.5
Expand All @@ -48,10 +48,10 @@ auto.chunk.time <- function(data.in){
}
}

block.df <- data.frame("windows"=block.int)
block.df <- data.frame("w"=block.int)
windowed.data <- cbind(data.in,block.df)

windowed.data[['windows']][j+1]=blck.i
windowed.data[['w']][j+1]=blck.i

return(windowed.data)
}
Expand Down
4 changes: 2 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ High-frequency aquatic sensor QAQC procedures. `sensorQC` imports data, and runs
library(sensorQC)
file <- system.file('extdata', 'test_data.txt', package = 'sensorQC')
sensor <- read(file, format="wide_burst", date.format="%m/%d/%Y %H:%M")
flag(sensor, 'x == 999999', 'n > 3', 'is.na(x)')
flag(sensor, 'x == 999999', 'persist(x) > 3', 'is.na(x)')
```

Use the `MAD` (median absolute deviation) test, and add `w` to the function call to specify "windows" (note, sensor must be windowed w/ `window()` prior to using `w`)
```{r}
sensor = window(sensor, 'auto')
flag(sensor, 'x == 999999', 'n > 3', 'MAD(x,w) > 3', 'MAD(x) > 3')
flag(sensor, 'x == 999999', 'persist(x) > 3', 'MAD(x,w) > 3', 'MAD(x) > 3')
```
72 changes: 36 additions & 36 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,57 +36,57 @@ sensor <- read(file, format="wide_burst", date.format="%m/%d/%Y %H:%M")
## number of observations:5100

``` r
flag(sensor, 'x == 999999', 'n > 3', 'is.na(x)')
flag(sensor, 'x == 999999', 'persist(x) > 3', 'is.na(x)')
```

## object of class "sensor"
## DateTime sensor.obs
## 1 2013-11-01 00:00:00 48.86
## 2 2013-11-01 00:00:01 49.04
## 3 2013-11-01 00:00:02 49.50
## 4 2013-11-01 00:00:03 48.91
## 5 2013-11-01 00:00:04 48.90
## 6 2013-11-01 00:00:05 48.96
## 7 2013-11-01 00:00:06 48.48
## 8 2013-11-01 00:00:07 48.97
## 9 2013-11-01 00:00:08 48.97
## 10 2013-11-01 00:00:09 48.99
## 11 2013-11-01 00:00:10 48.35
## 12 2013-11-01 00:00:11 48.51
## 13 2013-11-01 00:00:12 49.25
## 14 2013-11-01 00:00:13 48.82
## 15 2013-11-01 00:00:14 49.22
## times x
## 1 2013-11-01 00:00:00 48.86
## 2 2013-11-01 00:00:01 49.04
## 3 2013-11-01 00:00:02 49.50
## 4 2013-11-01 00:00:03 48.91
## 5 2013-11-01 00:00:04 48.90
## 6 2013-11-01 00:00:05 48.96
## 7 2013-11-01 00:00:06 48.48
## 8 2013-11-01 00:00:07 48.97
## 9 2013-11-01 00:00:08 48.97
## 10 2013-11-01 00:00:09 48.99
## 11 2013-11-01 00:00:10 48.35
## 12 2013-11-01 00:00:11 48.51
## 13 2013-11-01 00:00:12 49.25
## 14 2013-11-01 00:00:13 48.82
## 15 2013-11-01 00:00:14 49.22
## ...
## x == 999999 (15 flags)
## n > 3 (4 flags)
## persist(x) > 3 (4 flags)
## is.na(x) (0 flags)

Use the `MAD` (median absolute deviation) test, and add `w` to the function call to specify "windows" (note, sensor must be windowed w/ `window()` prior to using `w`)

``` r
sensor = window(sensor, 'auto')
flag(sensor, 'x == 999999', 'n > 3', 'MAD(x,w) > 3', 'MAD(x) > 3')
flag(sensor, 'x == 999999', 'persist(x) > 3', 'MAD(x,w) > 3', 'MAD(x) > 3')
```

## object of class "sensor"
## DateTime sensor.obs
## 1 2013-11-01 00:00:00 48.86
## 2 2013-11-01 00:00:01 49.04
## 3 2013-11-01 00:00:02 49.50
## 4 2013-11-01 00:00:03 48.91
## 5 2013-11-01 00:00:04 48.90
## 6 2013-11-01 00:00:05 48.96
## 7 2013-11-01 00:00:06 48.48
## 8 2013-11-01 00:00:07 48.97
## 9 2013-11-01 00:00:08 48.97
## 10 2013-11-01 00:00:09 48.99
## 11 2013-11-01 00:00:10 48.35
## 12 2013-11-01 00:00:11 48.51
## 13 2013-11-01 00:00:12 49.25
## 14 2013-11-01 00:00:13 48.82
## 15 2013-11-01 00:00:14 49.22
## times x
## 1 2013-11-01 00:00:00 48.86
## 2 2013-11-01 00:00:01 49.04
## 3 2013-11-01 00:00:02 49.50
## 4 2013-11-01 00:00:03 48.91
## 5 2013-11-01 00:00:04 48.90
## 6 2013-11-01 00:00:05 48.96
## 7 2013-11-01 00:00:06 48.48
## 8 2013-11-01 00:00:07 48.97
## 9 2013-11-01 00:00:08 48.97
## 10 2013-11-01 00:00:09 48.99
## 11 2013-11-01 00:00:10 48.35
## 12 2013-11-01 00:00:11 48.51
## 13 2013-11-01 00:00:12 49.25
## 14 2013-11-01 00:00:13 48.82
## 15 2013-11-01 00:00:14 49.22
## ...
## x == 999999 (15 flags)
## n > 3 (4 flags)
## persist(x) > 3 (4 flags)
## MAD(x,w) > 3 (129 flags)
## MAD(x) > 3 (91 flags)
4 changes: 2 additions & 2 deletions tests/testthat/test-flag_logic.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ test_that("is.na(x)", {
values = c(1,3,2,3,4,5,5,5,4,3,5,NA,5,NA)
sensor <- sensor(data.frame("DateTime"=dates,"sensor.obs"=values))
test_that("persistent", {
expect_equal(sum(calc_flags(sensor, 'n > 3', which.flagged=FALSE)), 0)
expect_equal(length(calc_flags(sensor, 'n > 2')), 3)
expect_equal(sum(calc_flags(sensor, 'persist(x) > 3', which.flagged=FALSE)), 0)
expect_equal(length(calc_flags(sensor, 'persist(x) > 2')), 3)
expect_equal(length(calc_flags(sensor, 'is.na(x)')), 2)
})
19 changes: 0 additions & 19 deletions tests/testthat/test-parse_expressions.R

This file was deleted.

0 comments on commit 4d46554

Please sign in to comment.