Skip to content

Commit

Permalink
Merge pull request #92 from lawinslow/master
Browse files Browse the repository at this point in the history
Code for dataset RC1
  • Loading branch information
Luke Winslow authored Jul 21, 2016
2 parents 630bebe + 271c0aa commit 4682732
Show file tree
Hide file tree
Showing 18 changed files with 1,176 additions and 251 deletions.
7 changes: 5 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: mda.lakes
Type: Package
Title: Tools for combining models, data, and processing for lakes
Version: 4.1.2
Version: 4.2.1
Date: 2015-12-03
Author: Luke Winslow, Jordan Read
Maintainer: Luke Winslow <lwinslow@usgs.gov>
Expand All @@ -16,7 +16,10 @@ Imports:
rLakeAnalyzer (>= 1.5),
insol,
plyr,
accelerometry
accelerometry,
lubridate,
dplyr,
tidyr
Enhances: geoknife
Description: More about what it does (maybe more than one line)
License: LICENSE file
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@ export(populate_base_lake_nml)
export(prep_run_glm_kd)
export(sens_seasonal_site)
export(set_driver_url)
export(summarize_notaro)
import(GLMr)
import(glmtools)
import(lakeattributes)
import(lubridate)
import(rLakeAnalyzer)
import(tidyr)
importFrom(accelerometry,rle2)
importFrom(insol,JD)
importFrom(insol,insolation)
Expand Down
13 changes: 11 additions & 2 deletions R/get_driver_nhd.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ get_driver_nhd = function(id, driver_name, loc_cache, timestep){

if(substr(driver_url, 0,7) == 'file://'){
dest = sub('file://', '', driver_url)
#if driver url is a zip file, pull out of zip
}else if(substr(pkg_info$dvr_url, nchar(pkg_info$dvr_url)-3,nchar(pkg_info$dvr_url)) == '.zip'){

unzip(pkg_info$dvr_url, files = paste0('drivers_GLM_', driver_name, '/', fname), exdir=dirname(dest), junkpaths=TRUE)

}else{
if(!download_helper(driver_url, dest)){
stop('failure downloading ', fname, '\n')
Expand Down Expand Up @@ -99,8 +104,12 @@ get_driver_index = function(driver_name, loc_cache=TRUE){
return(read.table(dest, sep='\t', header=TRUE, as.is=TRUE))
}

if(!download_helper(index_url, dest)){
stop('driver_index.tsv: unable to download for driver data:', driver_name)
if(substr(pkg_info$dvr_url, nchar(pkg_info$dvr_url)-3,nchar(pkg_info$dvr_url)) == '.zip'){
unzip(pkg_info$dvr_url, files = paste0('drivers_GLM_', driver_name, '/driver_index.tsv'), exdir=dirname(dest), junkpaths=TRUE)
}else{
if(!download_helper(index_url, dest)){
stop('driver_index.tsv: unable to download for driver data:', driver_name)
}
}

return(read.table(dest, sep='\t', header=TRUE, as.is=TRUE))
Expand Down
2 changes: 1 addition & 1 deletion R/necsc_thermal_metrics_core.R
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ necsc_thermal_metrics_core = function(run.path, site_id){
nml.file = file.path(run.path, 'glm2.nml')

all_wtr = get_temp(nc.file, reference='surface')
bottom_wtr = get_temp(nc.file, reference='bottom', z_out=0.1) #0.1m from bottom
bottom_wtr = get_temp(nc.file, reference='bottom', z_out=0) #bottom temperature
surface_wtr = get_temp(nc.file, reference='surface', z_out=0) #surface temps
all_ice = get_ice(nc.file)
all_raw_wtr = get_raw(nc.file, 'temp')
Expand Down
5 changes: 3 additions & 2 deletions R/populate_base_lake_nml.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ populate_base_lake_nml = function(site_id, kd=get_kd_avg(site_id)$kd_avg, nml_te
#get default template
nml_obj = read_nml(nml_template)

initZ = c(0,0.2, zmax);
#make last depth slightly less than total depth to avoid
# precision-based GLM errors
initZ = c(0,0.2, (zmax-0.1));
initT = c(3,4,4);
initS = c(0,0,0);

Expand Down Expand Up @@ -52,7 +54,6 @@ populate_base_lake_nml = function(site_id, kd=get_kd_avg(site_id)$kd_avg, nml_te
'ch'=0.0014,
'cd'=0.0013,
'rain_sw'=FALSE,
'snow_sw'=TRUE,
'num_inflows'=0,
'outl_elvs'=1,
'outflow_fl'='outflow.csv',
Expand Down
4 changes: 2 additions & 2 deletions R/prep_run_glm_kd.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
#'@export
prep_run_glm_kd <- function(site_id, path, years,
kd = getClarity(site_id, default.if.null=TRUE),
nml_args=NULL, sed_heat=FALSE){
nml_args=NULL, sed_heat=FALSE, verbose=FALSE){


nml_obj = populate_base_lake_nml(site_id, kd=kd)
Expand All @@ -54,7 +54,7 @@ prep_run_glm_kd <- function(site_id, path, years,

nml_out_path = file.path(path, "glm2.nml")
write_nml(nml_obj, nml_out_path)
out_val = run_glm(path)
out_val = run_glm(path, verbose=verbose)


return(out_val)
Expand Down
68 changes: 68 additions & 0 deletions R/summarize_var_notaro.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#' @title Summary stats for Notaro
#'
#' @param nc.file path to nc file
#'
#' @return data.frame with summary statistics
#'
#'
#' @import tidyr
#' @import lubridate
#'
#' @export
summarize_notaro = function(nc.file){
library(glmtools)
exclude.vars <- c("extc_coef", "salt", "precip", "wind")
var.names <- as.character(sim_vars(nc.file)$name)

lake.data = lapply(var.names[!var.names %in% exclude.vars], function(x) summarize_var_notaro(nc.file, x))
df.data <- data.frame()
for (i in 1:length(lake.data)){
df.data <- rbind(df.data, lake.data[[i]])
}
return(df.data)
}


# for the 1D vars, get depths and summarize, for annual scale
summarize_var_notaro <- function(nc.file, var.name){
library(dplyr)
library(tidyr)
library(lubridate)

unit <- sim_var_units(nc.file, var.name)
is.1D <- glmtools:::.is_heatmap(nc.file, var.name)
value.name <- sprintf('%s%s', var.name, ifelse(unit=='', '',paste0(' (',unit,')')))
if (is.1D){
rename_depths <- function(depths){
unlist(lapply(strsplit(depths, '[_]'), function(x) sprintf('%s_%s', round(eval(parse(text=head(tail(x,2),1))), digits = 2), tail(x,1))))
}
get_depth <- function(names){
as.numeric(unname(unlist(sapply(names, function(x) strsplit(x,'[_]')[[1]][1]))))
}
get_stat <- function(names){
unname(unlist(sapply(names, function(x) strsplit(x,'[_]')[[1]][2])))
}
var <- get_var(nc.file, var.name, reference='surface') %>%
mutate(base.date=as.POSIXct(paste0(lubridate::year(DateTime),'-01-01')), tz='UTC') %>%
mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>%
select(-DateTime, -tz, -base.date) %>% select(doy, everything()) %>% group_by(doy) %>%
summarize_each(c('mean','sd')) %>%
setNames(c('doy',rename_depths(names(.)[-1L]))) %>% gather(key = doy) %>%
setNames(c('doy','depth_stat','value')) %>%
mutate(depth=get_depth(depth_stat), statistic=get_stat(depth_stat), variable=value.name) %>%
select(doy, depth, statistic, value, variable) %>%
filter(doy < 366)
} else {
var <- get_var(nc.file, var.name)%>%
mutate(base.date=as.POSIXct(paste0(lubridate::year(DateTime),'-01-01')), tz='UTC') %>%
mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% select_('doy', var.name) %>% group_by(doy) %>%
summarize_each(c('mean','sd')) %>% gather(key = doy) %>%
setNames(c('doy','statistic','value')) %>%
mutate(depth=NA, variable=value.name) %>%
select(doy, depth, statistic, value, variable) %>%
filter(doy < 366)
}

return(var)
}

124 changes: 22 additions & 102 deletions demo/cluster_cont_calibrate_bias.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ driver_url = paste0('http://', (Sys.info()["nodename"]),':4041/')
library(parallel)

#lets try 50 to start
c1 = makePSOCKcluster(paste0('licon', 1:50), manual=TRUE, port=4046)
c1 = makePSOCKcluster(paste0('licon', 1:60), manual=TRUE, port=4045)

clusterExport(c1, varlist = 'local_url')
clusterExport(c1, varlist = 'driver_url')
Expand All @@ -27,7 +27,7 @@ clusterCall(c1, function(){library(devtools)})
#glmr_install = clusterCall(c1, function(){install.packages('glmtools', repos=c('http://owi.usgs.gov/R', 'http://cran.rstudio.com'))})
glmr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/GLMr_3.1.10.tar.gz'))})
glmtools_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/glmtools_0.13.0.tar.gz'))})
lakeattr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/lakeattributes_0.8.4.tar.gz'))})
lakeattr_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/lakeattributes_0.8.6.tar.gz'))})
mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_3.0.4.tar.gz'))})

library(lakeattributes)
Expand All @@ -36,142 +36,62 @@ library(dplyr)
library(glmtools)

## setup run function

run_cal = function(site_id, years=1979:2012, driver_function=get_driver_path, nml_args=list(), datasource=NA){


library(lakeattributes)
library(mda.lakes)
library(dplyr)
library(glmtools)

fastdir = tempdir()
if(file.exists('/mnt/ramdisk')){
fastdir = '/mnt/ramdisk'
}

secchi_conv = 1.7
tryCatch({


run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1)))
cat(run_dir, '\n')
dir.create(run_dir)

#rename for dplyr
nhd_id = site_id

data(wtemp)
obs = filter(wtemp, site_id == nhd_id) %>%
transmute(DateTime=date, Depth=depth, temp=wtemp)

#having a weird issue with resample_to_field, make unique
obs = obs[!duplicated(obs[,1:2]), ]

write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)

#get driver data
driver_path = driver_function(site_id)
driver_path = gsub('\\\\', '/', driver_path)


kds = get_kd_best(site_id, years=years, datasource = datasource)

kd_avg = secchi_conv/mean(kds$secchi_avg, na.rm=TRUE)

prep_run_glm_kd(site_id=site_id,
path=run_dir,
years=years,
kd=kd_avg,
nml_args=c(list(
dt=3600, subdaily=FALSE, nsave=24,
timezone=-6,
csv_point_nlevs=0,
snow_albedo_factor=1.1,
meteo_fl=driver_path,
cd=getCD(site_id, method='Hondzo')),
nml_args))

cal.data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
cal.data$site_id = site_id

unlink(run_dir, recursive=TRUE)

return(cal.data)

}, error=function(e){
unlink(run_dir, recursive=TRUE)
return(paste(e$call, e$message, Sys.info()["nodename"]))
})
}
source('demo/common_running_functions.R')


driver_fun = function(site_id){
nldas = read.csv(get_driver_path(site_id, driver_name = 'NLDAS', timestep = 'daily'), header=TRUE)
drivers = driver_nldas_wind_debias(nldas)
drivers = driver_add_burnin_years(drivers, nyears=2)
drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
#drivers$time = drivers$time - as.difftime(2, units='days')
driver_save(drivers)
}


#we want only ramdisk enabled nodes
#ramdisk = clusterCall(c1, function(){file.exists('/mnt/ramdisk')})
#c1 = c1[unlist(ramdisk)]
data(wtemp)
to_run = unique(intersect(intersect(wtemp$site_id, zmax$site_id), secchi$site_id))
#to_run = to_run[to_run %in% read.table('~/managed_lakes_wilma_nhd.tsv', sep='\t', header=TRUE, as.is=TRUE)$id]

run_name = '2016-04-06_cal_NLDAS'
run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014"
run_name = '2016-04-12_cal_2014stop_NLDAS'
run_comment = "standard, all Secchi 1980-2014 run to compare to GCMs"

clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})
#clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})

out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA)

#out = clusterApplyLB(c1, to_run[1:50], run_cal)

sprintf('%i lakes ran\n', sum(unlist(lapply(out, inherits, what='data.frame'))))
out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(),
years=1980:2014, secchi_function=secchi_function=function(site_id){secchi_target_month(site_id, target_month=5:6)})

outl = split_run_output(out)
out_err = outl[['out_err']]
nml = outl[['nmls']]

##results
out_df = do.call('rbind', out[unlist(lapply(out, inherits, what='data.frame'))])
write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE)
write.table(outl[['out_df']], paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE)

out_err = out[!unlist(lapply(out, inherits, what='data.frame'))]
save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata'))

rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'),
save(nmls, file=paste0('c:/WiLMA/results/', run_name, 'nml.Rdata'))

rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'),
output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment))


sqrt(mean((out_df$Observed_temp - out_df$Modeled_temp)^2, na.rm=TRUE))
mean(out_df$Observed_temp - out_df$Modeled_temp, na.rm=TRUE)

run_comment = "Markfort sheltering model for full calibration run with all available wtemp, secchi and depth. 1980-2014"

clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})

out = clusterApplyLB(c1, to_run, run_cal, driver_function = driver_fun, nml_args=list(), years=1980:2014, datasource=NA)
parent = '570e8688e4b0ef3b7ca24d8e'
files = c(Sys.glob(paste0('c:/WiLMA/results/', run_name, '.*')), Sys.glob(paste0('c:/WiLMA/results/', run_name, 'nml.*')))

#out = clusterApplyLB(c1, to_run[1:50], run_cal)

sprintf('%i lakes ran\n', sum(unlist(lapply(out, inherits, what='data.frame'))))
library(sbtools)
authenticate_sb('lwinslow@usgs.gov', pass)
itm = item_create(parent, title=run_name)

item_append_files(itm, files)

##results
out_df = do.call('rbind', out[unlist(lapply(out, inherits, what='data.frame'))])
write.table(out_df, paste0('c:/WiLMA/results/', run_name, '.tsv'), sep='\t', row.names=FALSE)

out_err = out[!unlist(lapply(out, inherits, what='data.frame'))]
save(out_err, file = paste0('c:/WiLMA/results/', run_name, '.Rdata'))

rmarkdown::render('demo/document_calibration_overview_template.Rmd', output_file=paste0(run_name,'.html'),
output_dir='c:/WiLMA/results/', params=list(out_df=out_df, run_message=run_comment))


sqrt(mean((out_df$Observed_temp - out_df$Modeled_temp)^2, na.rm=TRUE))
mean(out_df$Observed_temp - out_df$Modeled_temp, na.rm=TRUE)

################################################################################
## ACCESS
Expand Down
Loading

0 comments on commit 4682732

Please sign in to comment.