Merge pull request #85 from lawinslow/master

updating upstream
USGS-R · Feb 23, 2016 · e61f0ec · e61f0ec
2 parents ecd3a1b + 0bdd33c
commit e61f0ec
Show file tree

Hide file tree

Showing 31 changed files with 474 additions and 1,261 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,21 +1,21 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 2.10.10
+Version: 3.0.0
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
 Depends:
-    R (>= 3.0)
+    R (>= 3.0),
+    lakeattributes
 Imports:
     glmtools (>= 0.10.2),
     jsonlite,
     GLMr,
     httr,
     rLakeAnalyzer (>= 1.5),
     insol,
-    plyr,
-    lakeattributes
+    plyr
 Enhances: geoknife
 Description: More about what it does (maybe more than one line)
 License: LICENSE file

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,6 +4,7 @@ export(LTER_NLDAS_gapper)
 export(area_light_temp_threshold)
 export(area_light_threshold)
 export(area_temp_threshold)
+export(calc_mod_obs_metric)
 export(calc_toha)
 export(calc_toha_stats)
 export(chained.habitat.calc)
@@ -33,6 +34,7 @@ export(getScenarioKd)
 export(getWstr)
 export(getZmax)
 export(getZmean)
+export(get_driver_index)
 export(get_driver_path)
 export(get_ice_onoff)
 export(get_wtr_chained)
@@ -46,11 +48,11 @@ export(opti_thermal_habitat)
 export(populate_base_lake_nml)
 export(prep_run_chained_glm)
 export(prep_run_chained_glm_kd)
-export(prep_run_chained_glm_secchi)
 export(prep_run_glm_kd)
 export(sens_seasonal_site)
 import(GLMr)
 import(glmtools)
+import(lakeattributes)
 import(rLakeAnalyzer)
 import(stringr)
 importFrom(insol,JD)

diff --git a/R/calc.strat.days.R b/R/calc.strat.days.R
@@ -1,69 +1,69 @@
-#source("Libraries/GLM.functions.R")
-#require(rGLM)
-lyrDz	<<-	0.25
-#year	<-	'2009'
-
-
-calc.strat.days	<-	function(WBIC,year,min.duration=30,mix.diff=1.0,all.days=1:365){
-	# min.duration is in TIME UNITS
-	# mix diff is °C
-
-	folder	<-	paste("../supporting files/10-06Final/WBIC_",WBIC,'/',sep='')
-	file	<-	paste('output',year,'.nc4',sep='')
-
-	# does the file exist?
-	if (file %in% list.files(path = paste(folder))){
-	  	cat('WBIC ');cat(WBIC); cat(' processing.')
-	  	GLMnc  <-	getGLMnc(file,folder=folder)
-	  	cat('.')
-	  	temp.surf	<-	getTempGLMnc(GLMnc,lyrDz,ref='surface',z.out=0)
-		temp.bot	<-	getTempGLMnc(GLMnc,lyrDz,ref='bottom',z.out=0)
-	  	nc_close(GLMnc)
-		is.strat	<-	(temp.surf[,2]-temp.bot[,2]>mix.diff) # is boolean vector
-		strat.DoY	<-	as.numeric(strftime(temp.surf[,1], format = "%j"))
-		# get rid of days that aren't stratified
-		strat.DoY	<-	strat.DoY[is.strat]
-		is.strat	<-	(all.days %in% strat.DoY)
-
-		return(is.strat)
-	} else {
-		return(vector(length=length(all.days)))
-	}
-}
-
-
-
-get.lakes <- function(year){
-  # if doing more than a few years, this should be stripped for just one column and kept in memory (future)
-  dat<-read.delim('../supporting files/omg.huge.output.tsv',header = TRUE, sep = "\t")
-  # get rid of all rows that don't match the year
-  use.idx <- which(dat$year==year & !is.na(dat$dateOver8.9))
-  dat <- dat[use.idx,]
-  lake.ids <- as.character(dat$lakeid)
-  return(lake.ids)
-}
-
-call.lakes	<-	function(year){
-	file.out <- paste("../supporting files/is.strat_",year,'.tsv',sep='')
-	lake.ids <- get.lakes(year=year)
-	num.lakes <- length(lake.ids)
-	all.days=1:365
-	strat.count	<-	vector(length=length(all.days))
-
-
-	for (j in 1:num.lakes){
-		WBIC = lake.ids[j]
-		strat.count	<-	calc.strat.days(WBIC,year,all.days=all.days)+strat.count
-		cat('.\n')
-	}
-
-	write.out <- data.frame(DoY=all.days,strat.count=strat.count)
-	# need to name the columns
-	col.names	<-	names(write.out)
-	col.names	<-	c("DoY",paste("strat.count.",num.lakes,sep=''))
-
-	names(write.out) <- col.names
-	write.table(x=write.out,file=file.out,sep='\t',col.names=TRUE,quote=FALSE,row.names = FALSE)
-}
-#call.lakes(1998)
+#source("Libraries/GLM.functions.R")
+#require(rGLM)
+lyrDz	<<-	0.25
+#year	<-	'2009'
+
+
+calc.strat.days	<-	function(WBIC,year,min.duration=30,mix.diff=1.0,all.days=1:365){
+	# min.duration is in TIME UNITS
+	# mix diff is °C
+
+	folder	<-	paste("../supporting files/10-06Final/WBIC_",WBIC,'/',sep='')
+	file	<-	paste('output',year,'.nc4',sep='')
+
+	# does the file exist?
+	if (file %in% list.files(path = paste(folder))){
+	  	cat('WBIC ');cat(WBIC); cat(' processing.')
+	  	GLMnc  <-	getGLMnc(file,folder=folder)
+	  	cat('.')
+	  	temp.surf	<-	getTempGLMnc(GLMnc,lyrDz,ref='surface',z.out=0)
+		temp.bot	<-	getTempGLMnc(GLMnc,lyrDz,ref='bottom',z.out=0)
+	  	nc_close(GLMnc)
+		is.strat	<-	(temp.surf[,2]-temp.bot[,2]>mix.diff) # is boolean vector
+		strat.DoY	<-	as.numeric(strftime(temp.surf[,1], format = "%j"))
+		# get rid of days that aren't stratified
+		strat.DoY	<-	strat.DoY[is.strat]
+		is.strat	<-	(all.days %in% strat.DoY)
+
+		return(is.strat)
+	} else {
+		return(vector(length=length(all.days)))
+	}
+}
+
+
+
+get.lakes <- function(year){
+  # if doing more than a few years, this should be stripped for just one column and kept in memory (future)
+  dat<-read.delim('../supporting files/omg.huge.output.tsv',header = TRUE, sep = "\t")
+  # get rid of all rows that don't match the year
+  use.idx <- which(dat$year==year & !is.na(dat$dateOver8.9))
+  dat <- dat[use.idx,]
+  lake.ids <- as.character(dat$lakeid)
+  return(lake.ids)
+}
+
+call.lakes	<-	function(year){
+	file.out <- paste("../supporting files/is.strat_",year,'.tsv',sep='')
+	lake.ids <- get.lakes(year=year)
+	num.lakes <- length(lake.ids)
+	all.days=1:365
+	strat.count	<-	vector(length=length(all.days))
+
+
+	for (j in 1:num.lakes){
+		WBIC = lake.ids[j]
+		strat.count	<-	calc.strat.days(WBIC,year,all.days=all.days)+strat.count
+		cat('.\n')
+	}
+
+	write.out <- data.frame(DoY=all.days,strat.count=strat.count)
+	# need to name the columns
+	col.names	<-	names(write.out)
+	col.names	<-	c("DoY",paste("strat.count.",num.lakes,sep=''))
+
+	names(write.out) <- col.names
+	write.table(x=write.out,file=file.out,sep='\t',col.names=TRUE,quote=FALSE,row.names = FALSE)
+}
+#call.lakes(1998)
 #call.lakes(1996)
diff --git a/R/calc_mod_obs_metric.R b/R/calc_mod_obs_metric.R
@@ -0,0 +1,47 @@
+#' @title Calculate limno metrics for lots of lakes
+#' 
+#' @description 
+#' Calculte aggregate metrics from a data frame with modeled and observed data. 
+#' Metric options include all available rLakeAnalyzer metrics
+#' 
+#' @seealso \code{\link[glmtools]{compare_to_field}}
+#' 
+#' 
+#' 
+#' @export
+calc_mod_obs_metric = function(mod_obs_df, metric){
+
+
+	#for each lake
+	uids = unique(mod_obs_df$site_id)
+
+	all_out = data.frame()
+
+	for(lid in uids){
+		lake_data = subset(mod_obs_df, site_id == lid)
+		lake_data$site_id = NULL
+		bathy = getBathy(strsplit(lid, '_')[[1]][2])
+
+		lake_data = ddply(lake_data, 'DateTime', function(df){
+									if(nrow(na.omit(df)) >= 5){
+										return(na.omit(df))
+									}else{
+										return(data.frame())
+									}
+								})
+
+		#run compare to field
+		calc_metric = glmtools:::.compare_to_field(lake_data, bthA=bathy$areas, bthD=bathy$depths, metric=metric, as_value=TRUE)
+
+		if(nrow(calc_metric) > 0){
+			calc_metric$site_id = lid
+			#merge metrics together
+			all_out = rbind(all_out, calc_metric)
+		}
+
+	}
+
+	#output
+	return(all_out)
+
+}
diff --git a/R/get_driver_nhd.R b/R/get_driver_nhd.R
@@ -1,5 +1,12 @@
+# Functions for NHD linked drivers NLDAS and Hostetler
+#
+#
+#
+
 get_driver_nhd = function(id, driver_name, loc_cache, timestep){
 
+	hostetler_names = c('ECHAM5', 'CM2.0', 'GENMOM')
+
 	#get index
 	indx = get_driver_index(driver_name, loc_cache)
 
@@ -30,7 +37,11 @@ get_driver_nhd = function(id, driver_name, loc_cache, timestep){
 	#create and save formatted dataframe
 	all_drivers = Reduce(function(...) merge(..., by='DateTime'), lapply(ls(driver_env), function(x)driver_env[[x]]))
 
-	glm_drivers = nldas_to_glm(all_drivers)
+	all_drivers = na.omit(all_drivers)
+
+	glm_drivers = drivers_to_glm(all_drivers)
+
+
 
 	if(timestep=='daily'){
 		daily = trunc(as.POSIXct(glm_drivers$time), units='days')
@@ -50,7 +61,19 @@ get_driver_nhd = function(id, driver_name, loc_cache, timestep){
 	return(dest)
 }
 
-get_driver_index = function(driver_name, loc_cache){
+#' @title Return the driver file index table
+#' 
+#' @inheritParams get_driver_path
+#' 
+#' @description 
+#' Accesses and returns the driver file index. Can be used to get list of 
+#' all available driver data for a given driver
+#' 
+#' @examples 
+#' unique(get_driver_index('NLDAS')$id)
+#' 
+#' @export
+get_driver_index = function(driver_name, loc_cache=TRUE){
 	#see if index file exists already
 	index_url = paste0(base_url, 'drivers_GLM_', driver_name, '/driver_index.tsv')
 	dest = file.path(tempdir(), driver_name, 'driver_index.tsv')
@@ -67,27 +90,43 @@ get_driver_index = function(driver_name, loc_cache){
 	return(read.table(dest, sep='\t', header=TRUE, as.is=TRUE))
 }
 
-nldas_to_glm = function(nldas_data){
+drivers_to_glm = function(driver_df){
 
 	## convert and downsample wind
 
-	nldas_data$WindSpeed = sqrt(nldas_data$ugrd10m^2 + nldas_data$vgrd10m^2)
-	nldas_data$ShortWave = nldas_data$dswrfsfc
-	nldas_data$LongWave  = nldas_data$dlwrfsfc
-	nldas_data$AirTemp   = nldas_data$tmp2m - 273.15 #convert K to deg C
-	nldas_data$Rain      = nldas_data$apcpsfc*24/1000 #convert to m/day rate
-	nldas_data$RelHum    = 100*nldas_data$spfh2m/qsat(nldas_data$tmp2m-273.15, nldas_data$pressfc*0.01)
+	driver_df$WindSpeed = sqrt(driver_df$ugrd10m^2 + driver_df$vgrd10m^2)
+	driver_df$ShortWave = driver_df$dswrfsfc
+	driver_df$LongWave  = driver_df$dlwrfsfc
+	driver_df$Rain      = driver_df$apcpsfc*24/1000 #convert to m/day rate
+
+	if('tmp2m' %in% names(driver_df)){
+		driver_df$AirTemp   = driver_df$tmp2m - 273.15 #convert K to deg C
+	}else if('airtemp' %in% names(driver_df)){
+		driver_df$AirTemp   = driver_df$airtemp #no conversion neede
+	}else{
+		stop('Unable to find temperature data.\nDriver service must have temp data (named tmp2m or airtemp). ')
+	}
+
+	if('relhum' %in% names(driver_df)){
+		driver_df$RelHum    = 100*driver_df$relhum
+	}else if('spfh2m' %in% names(driver_df)){
+		driver_df$RelHum    = 100*driver_df$spfh2m/qsat(driver_df$tmp2m-273.15, driver_df$pressfc*0.01)
+	}else{
+		stop('Unable to find humidity data.\nDriver service must have humidity data (named relhum or spfh2m). ')
+	}
+
 
 	#now deal with snow
-	nldas_data$Snow = 0
+	driver_df$Snow = 0
 
 	# 10:1 ratio assuming 1:10 density ratio water weight
-	nldas_data$Snow[nldas_data$AirTemp < 0] = nldas_data$Rain[nldas_data$AirTemp < 0]*10 
-	nldas_data$Rain[nldas_data$AirTemp < 0] = 0
+	driver_df$Snow[driver_df$AirTemp < 0] = driver_df$Rain[driver_df$AirTemp < 0]*10 
+	driver_df$Rain[driver_df$AirTemp < 0] = 0
 
 	#convert DateTime to properly formatted string
-	nldas_data$time = nldas_data$DateTime
-	nldas_data$time = format(nldas_data$time,'%Y-%m-%d %H:%M:%S')
+	driver_df$time = driver_df$DateTime
+	driver_df$time = format(driver_df$time,'%Y-%m-%d %H:%M:%S')
 
-	return(nldas_data[order(nldas_data$time), c('time','ShortWave','LongWave','AirTemp','RelHum','WindSpeed','Rain','Snow')])
+	return(driver_df[order(driver_df$time), c('time','ShortWave','LongWave','AirTemp','RelHum','WindSpeed','Rain','Snow')])
 }
+
diff --git a/R/get_driver_path.R b/R/get_driver_path.R
@@ -1,4 +1,3 @@
-
 #'@title Return driver file location for given lake
 #'
 #'@description
@@ -13,6 +12,13 @@
 #'
 #'@author Luke Winslow
 #'
+#'@examples
+#'get_driver_path('nhd_120052892')
+#'get_driver_path('nhd_120052892', 'ECHAM5')
+#'get_driver_path('nhd_120052892', 'CM2.0')
+#'
+#'
+#'get_driver_path('WBIC_1881900')
 #'
 #'@export
 get_driver_path = function(id, driver_name='NLDAS', loc_cache=TRUE, timestep='daily', ...){