From 62da805ecc88b85e6f10c12fbbb115f0a00497ef Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Fri, 22 Jul 2016 09:40:08 -0500
Subject: [PATCH 01/42] separate notaro sim output by year range

---
 demo/mpi_hab_out.R | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index bc5db51..41f6c57 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -110,19 +110,19 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		
 		hansen_habitat = hansen_habitat_calc(run_dir, site_id)
 		
-		#notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc'))
+		notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc'))
 		
 		nml = read_nml(file.path(run_dir, "glm2.nml"))
 		
 		
 		unlink(run_dir, recursive=TRUE)
 		
-		#notaro_metrics$site_id = site_id
+		notaro_metrics$site_id = site_id
 		
 		all_data = list(wtr=wtr_all, core_metrics=core_metrics, 
 		                hansen_habitat=hansen_habitat, 
 		                site_id=site_id, 
-		                #notaro_metrics=notaro_metrics, 
+		                notaro_metrics=notaro_metrics, 
 		                nml=nml)
 		
 		return(all_data)
@@ -152,8 +152,8 @@ getnext = function(fname){
 	return(fname)
 }
 
-wrapup_output = function(out, run_name, years){
-	out_dir = file.path('.', run_name)
+wrapup_output = function(out, out_dir, years){
+	#out_dir = file.path('.', run_name)
 	
 	run_exists = file.exists(out_dir)
 	
@@ -173,13 +173,15 @@ wrapup_output = function(out, run_name, years){
 	core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
 	core_metrics = subset(core_metrics, year %in% years)
 	
-	#notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
+	notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
 	
 	model_config = lapply(good_data, function(x){x$nml})
 	
+	notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
+	write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-	#write.table(notaro_metrics, file.path(out_dir, 'notaro_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+	
 	
 	save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
 	save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
@@ -205,8 +207,9 @@ driver_fun = function(site_id, gcm){
 config = read.table('config', header=TRUE, as.is=TRUE)
 
 driver_name = config$drivername
-
 driver_url = config$driverurl
+out_dir = config$outdir
+
 
 to_run = as.character(unique(zmax$site_id))
 to_run = split(to_run, cut(seq_along(to_run), mpisize, labels = FALSE))[[mpirank+1]]
@@ -232,7 +235,7 @@ for(ygroup in yeargroups){
 												 secchi_function=secchi_standard,
 												 driver_function=function(site_id){driver_fun(site_id, driver_name)})
 		
-		wrapup_output(out, run_name, years=ygroup)
+		wrapup_output(out, file.path(out_dir, run_name), years=ygroup)
 		
 		print(difftime(Sys.time(), start, units='hours'))
 		cat('on to the next\n')

From 15390d54a03bcc6dd9fc42ada4600727f192a533 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Fri, 22 Jul 2016 09:41:01 -0500
Subject: [PATCH 02/42] version increment

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5f358ac..ef4368b 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.1
+Version: 4.2.2
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>

From 15f6c285182c51372ae0d8d7cdf8218deca974e0 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Fri, 22 Jul 2016 10:06:49 -0500
Subject: [PATCH 03/42] need driver name in there too

---
 demo/mpi_hab_out.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index 41f6c57..7be7850 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -208,7 +208,7 @@ config = read.table('config', header=TRUE, as.is=TRUE)
 
 driver_name = config$drivername
 driver_url = config$driverurl
-out_dir = config$outdir
+out_dir = file.path(config$outdir, driver_name)
 
 
 to_run = as.character(unique(zmax$site_id))

From c51b63ec287659a336b2a385ef6e2c780bdacd59 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 25 Jul 2016 12:37:47 -0500
Subject: [PATCH 04/42] debugging some YETI issues

---
 DESCRIPTION        | 2 +-
 demo/mpi_hab_out.R | 9 +--------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ef4368b..886453f 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.2
+Version: 4.2.3
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index 7be7850..2f972ae 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -134,13 +134,6 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 }
 
 
-driver_fun = function(site_id){
-	nldas = read.csv(get_driver_path(site_id, driver_name = 'NLDAS'), header=TRUE)
-	drivers = driver_nldas_wind_debias(nldas)
-	drivers = driver_add_burnin_years(drivers, nyears=2)
-	drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
-	driver_save(drivers)
-}
 
 getnext = function(fname){
 	i=0
@@ -224,7 +217,7 @@ set_driver_url(driver_url)
 run_name = paste0(mpirank)
 
 ##1980-1999
-runsplits = split(1:length(to_run), floor(1:length(to_run)/1e3))
+runsplits = split(1:length(to_run), floor(1:length(to_run)/10))
 yeargroups = list(1980:1999, 2020:2039, 2080:2099)
 
 for(ygroup in yeargroups){

From 0f1304c9f16884dbc8a8f97f61258e0fda7bab22 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 25 Jul 2016 12:42:44 -0500
Subject: [PATCH 05/42] more specific details back

---
 DESCRIPTION        |  2 +-
 demo/mpi_hab_out.R | 69 ++++++++++++++++++++++++----------------------
 2 files changed, 37 insertions(+), 34 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 886453f..fe50c14 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.3
+Version: 4.2.4
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index 2f972ae..7885739 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -147,40 +147,43 @@ getnext = function(fname){
 
 wrapup_output = function(out, out_dir, years){
 	#out_dir = file.path('.', run_name)
+	tryCatch({
+  	run_exists = file.exists(out_dir)
+  	
+  	if(!run_exists) {dir.create(out_dir, recursive=TRUE)}
+  	
+  	good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
+  	bad_data  = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
+  	
+  	sprintf('%i lakes ran\n', length(good_data))
+  	dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
+  	#drop the burn-in years
+  	dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
+  	
+  	hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
+  	hansen_habitat = subset(hansen_habitat, year %in% years)
+  	
+  	core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
+  	core_metrics = subset(core_metrics, year %in% years)
+  	
+  	notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
+  	
+  	model_config = lapply(good_data, function(x){x$nml})
+  	
+  	notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
+  	write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+  	write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+  	write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+  	
+  	
+  	save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
+  	save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
+  	save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
+  	
+  	rm(out, good_data, dframes)
+  	
+	}, error=function(d){traceback()})
 	
-	run_exists = file.exists(out_dir)
-	
-	if(!run_exists) {dir.create(out_dir, recursive=TRUE)}
-	
-	good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
-	bad_data  = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
-	
-	sprintf('%i lakes ran\n', length(good_data))
-	dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
-	#drop the burn-in years
-	dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
-	
-	hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
-	hansen_habitat = subset(hansen_habitat, year %in% years)
-	
-	core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
-	core_metrics = subset(core_metrics, year %in% years)
-	
-	notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
-	
-	model_config = lapply(good_data, function(x){x$nml})
-	
-	notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
-	write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-	write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-	write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-	
-	
-	save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
-	save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
-	save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
-	
-	rm(out, good_data, dframes)
 	gc()
 }
 

From e2b25c1edc187af3bec0e6b9629e81c2fea36b16 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 25 Jul 2016 13:33:34 -0500
Subject: [PATCH 06/42] try without using projects output

---
 DESCRIPTION        |  2 +-
 demo/mpi_hab_out.R | 75 ++++++++++++++++++++++------------------------
 2 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index fe50c14..acb4049 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.4
+Version: 4.2.5
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index 7885739..e1386fb 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -145,45 +145,42 @@ getnext = function(fname){
 	return(fname)
 }
 
-wrapup_output = function(out, out_dir, years){
-	#out_dir = file.path('.', run_name)
-	tryCatch({
-  	run_exists = file.exists(out_dir)
-  	
-  	if(!run_exists) {dir.create(out_dir, recursive=TRUE)}
-  	
-  	good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
-  	bad_data  = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
-  	
-  	sprintf('%i lakes ran\n', length(good_data))
-  	dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
-  	#drop the burn-in years
-  	dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
-  	
-  	hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
-  	hansen_habitat = subset(hansen_habitat, year %in% years)
-  	
-  	core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
-  	core_metrics = subset(core_metrics, year %in% years)
-  	
-  	notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
-  	
-  	model_config = lapply(good_data, function(x){x$nml})
-  	
-  	notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
-  	write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-  	write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-  	write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-  	
-  	
-  	save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
-  	save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
-  	save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
-  	
-  	rm(out, good_data, dframes)
-  	
-	}, error=function(d){traceback()})
+wrapup_output = function(out, run_name, years){
+	out_dir = file.path('.', run_name)
+
+	run_exists = file.exists(out_dir)
+	
+	if(!run_exists) {dir.create(out_dir, recursive=TRUE)}
+	
+	good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
+	bad_data  = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
+	
+	sprintf('%i lakes ran\n', length(good_data))
+	dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
+	#drop the burn-in years
+	dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
+	
+	hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
+	hansen_habitat = subset(hansen_habitat, year %in% years)
+	
+	core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
+	core_metrics = subset(core_metrics, year %in% years)
+	
+	notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
+	
+	model_config = lapply(good_data, function(x){x$nml})
+	
+	notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
+	write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+	write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+	write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+	
+	
+	save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
+	save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
+	save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
 	
+	rm(out, good_data, dframes)
 	gc()
 }
 
@@ -231,7 +228,7 @@ for(ygroup in yeargroups){
 												 secchi_function=secchi_standard,
 												 driver_function=function(site_id){driver_fun(site_id, driver_name)})
 		
-		wrapup_output(out, file.path(out_dir, run_name), years=ygroup)
+		wrapup_output(out, run_name, years=ygroup)
 		
 		print(difftime(Sys.time(), start, units='hours'))
 		cat('on to the next\n')

From ee2452ee56925a15201bd05ae8fc712d83f0f968 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 25 Jul 2016 15:31:04 -0500
Subject: [PATCH 07/42] some issue with gather when running on linux, trying to
 fix

---
 R/summarize_var_notaro.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/summarize_var_notaro.R b/R/summarize_var_notaro.R
index fff35e6..5a41135 100644
--- a/R/summarize_var_notaro.R
+++ b/R/summarize_var_notaro.R
@@ -47,7 +47,7 @@ summarize_var_notaro <- function(nc.file, var.name){
       mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% 
       select(-DateTime, -tz, -base.date) %>% select(doy, everything()) %>% group_by(doy) %>%  
       summarize_each(c('mean','sd')) %>% 
-      setNames(c('doy',rename_depths(names(.)[-1L]))) %>% gather(key = doy) %>% 
+      setNames(c('doy',rename_depths(names(.)[-1L]))) %>% ##gather(key = doy) %>% 
       setNames(c('doy','depth_stat','value')) %>% 
       mutate(depth=get_depth(depth_stat), statistic=get_stat(depth_stat), variable=value.name) %>% 
       select(doy, depth, statistic, value, variable) %>% 
@@ -56,7 +56,7 @@ summarize_var_notaro <- function(nc.file, var.name){
     var <- get_var(nc.file, var.name)%>% 
       mutate(base.date=as.POSIXct(paste0(lubridate::year(DateTime),'-01-01')), tz='UTC') %>% 
       mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% select_('doy', var.name) %>% group_by(doy) %>% 
-      summarize_each(c('mean','sd')) %>% gather(key = doy) %>% 
+      summarize_each(c('mean','sd')) %>% ##gather(key = doy) %>% 
       setNames(c('doy','statistic','value')) %>% 
       mutate(depth=NA, variable=value.name) %>% 
       select(doy, depth, statistic, value, variable) %>% 

From b020d26210b59c5f160b348ce0ab1cb77bcd4a12 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Tue, 26 Jul 2016 09:58:24 -0500
Subject: [PATCH 08/42] fixing freaking tibble caused issue on *nix

---
 DESCRIPTION              | 2 +-
 R/summarize_var_notaro.R | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index acb4049..7829ce0 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.5
+Version: 4.2.6
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/summarize_var_notaro.R b/R/summarize_var_notaro.R
index 5a41135..9575a03 100644
--- a/R/summarize_var_notaro.R
+++ b/R/summarize_var_notaro.R
@@ -47,7 +47,8 @@ summarize_var_notaro <- function(nc.file, var.name){
       mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% 
       select(-DateTime, -tz, -base.date) %>% select(doy, everything()) %>% group_by(doy) %>%  
       summarize_each(c('mean','sd')) %>% 
-      setNames(c('doy',rename_depths(names(.)[-1L]))) %>% ##gather(key = doy) %>% 
+      setNames(c('doy',rename_depths(names(.)[-1L]))) %>% 
+      as.data.frame %>% gather(key = doy) %>% 
       setNames(c('doy','depth_stat','value')) %>% 
       mutate(depth=get_depth(depth_stat), statistic=get_stat(depth_stat), variable=value.name) %>% 
       select(doy, depth, statistic, value, variable) %>% 
@@ -56,7 +57,7 @@ summarize_var_notaro <- function(nc.file, var.name){
     var <- get_var(nc.file, var.name)%>% 
       mutate(base.date=as.POSIXct(paste0(lubridate::year(DateTime),'-01-01')), tz='UTC') %>% 
       mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% select_('doy', var.name) %>% group_by(doy) %>% 
-      summarize_each(c('mean','sd')) %>% ##gather(key = doy) %>% 
+      summarize_each(c('mean','sd')) %>% as.data.frame %>% gather(doy) %>% 
       setNames(c('doy','statistic','value')) %>% 
       mutate(depth=NA, variable=value.name) %>% 
       select(doy, depth, statistic, value, variable) %>% 

From 9aa0a4e4017a8e280e41463f591b0efceb18fc28 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Tue, 26 Jul 2016 10:12:06 -0500
Subject: [PATCH 09/42] return to projects dir outputs

---
 DESCRIPTION        | 2 +-
 demo/mpi_hab_out.R | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 7829ce0..5aebf92 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.6
+Version: 4.2.7
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index e1386fb..da6c9a3 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -145,8 +145,8 @@ getnext = function(fname){
 	return(fname)
 }
 
-wrapup_output = function(out, run_name, years){
-	out_dir = file.path('.', run_name)
+wrapup_output = function(out, out_dir, years){
+	#out_dir = file.path('.', run_name)
 
 	run_exists = file.exists(out_dir)
 	
@@ -217,7 +217,7 @@ set_driver_url(driver_url)
 run_name = paste0(mpirank)
 
 ##1980-1999
-runsplits = split(1:length(to_run), floor(1:length(to_run)/10))
+runsplits = split(1:length(to_run), floor(1:length(to_run)/1e3))
 yeargroups = list(1980:1999, 2020:2039, 2080:2099)
 
 for(ygroup in yeargroups){
@@ -228,7 +228,7 @@ for(ygroup in yeargroups){
 												 secchi_function=secchi_standard,
 												 driver_function=function(site_id){driver_fun(site_id, driver_name)})
 		
-		wrapup_output(out, run_name, years=ygroup)
+		wrapup_output(out, system.file(out_dir, run_name), years=ygroup)
 		
 		print(difftime(Sys.time(), start, units='hours'))
 		cat('on to the next\n')

From d9aafe3675dedba89bac35090c6411fcbeefffeb Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Tue, 26 Jul 2016 10:18:22 -0500
Subject: [PATCH 10/42] 100, not 1000

---
 demo/mpi_hab_out.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index da6c9a3..b52aa9a 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -217,7 +217,7 @@ set_driver_url(driver_url)
 run_name = paste0(mpirank)
 
 ##1980-1999
-runsplits = split(1:length(to_run), floor(1:length(to_run)/1e3))
+runsplits = split(1:length(to_run), floor(1:length(to_run)/1e2))
 yeargroups = list(1980:1999, 2020:2039, 2080:2099)
 
 for(ygroup in yeargroups){

From 5ae34f00a4825aaca53f96a05e8d12d988d581ae Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Tue, 26 Jul 2016 17:35:42 -0500
Subject: [PATCH 11/42] fix file path typo

---
 demo/mpi_hab_out.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index b52aa9a..3b47dc7 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -171,7 +171,7 @@ wrapup_output = function(out, out_dir, years){
 	model_config = lapply(good_data, function(x){x$nml})
 	
 	notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
-	write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+	write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file))
 	write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	
@@ -228,7 +228,7 @@ for(ygroup in yeargroups){
 												 secchi_function=secchi_standard,
 												 driver_function=function(site_id){driver_fun(site_id, driver_name)})
 		
-		wrapup_output(out, system.file(out_dir, run_name), years=ygroup)
+		wrapup_output(out, file.path(out_dir, run_name), years=ygroup)
 		
 		print(difftime(Sys.time(), start, units='hours'))
 		cat('on to the next\n')

From 3718d4440acddc660e888d4627d7d476db067c62 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Tue, 26 Jul 2016 17:36:13 -0500
Subject: [PATCH 12/42] version increment

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5aebf92..d30a7d9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.7
+Version: 4.2.8
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>

From 77a5891118cfa791e4f10b696639420d552c1a64 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Fri, 5 Aug 2016 15:52:07 -0500
Subject: [PATCH 13/42] add cal data output to model runs

---
 demo/mpi_hab_out.R | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index b52aa9a..c7b77f8 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -74,6 +74,17 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		#rename for dplyr
 		nhd_id = site_id
 		
+		#prep observations for calibration data
+		data(wtemp)
+		obs = filter(wtemp, site_id == nhd_id) %>%
+		  transmute(DateTime=date, Depth=depth, temp=wtemp)
+		
+		#having a weird issue with resample_to_field, make unique
+		obs = obs[!duplicated(obs[,1:2]), ]
+		
+		write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)
+		
+		
 		#get driver data
 		driver_path = driver_function(site_id)
 		driver_path = gsub('\\\\', '/', driver_path)
@@ -114,6 +125,8 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		
 		nml = read_nml(file.path(run_dir, "glm2.nml"))
 		
+		cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
+		cal_data$site_id = site_id
 		
 		unlink(run_dir, recursive=TRUE)
 		
@@ -123,7 +136,8 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		                hansen_habitat=hansen_habitat, 
 		                site_id=site_id, 
 		                notaro_metrics=notaro_metrics, 
-		                nml=nml)
+		                nml=nml, 
+		                cal_data=cal_data)
 		
 		return(all_data)
 		
@@ -168,12 +182,15 @@ wrapup_output = function(out, out_dir, years){
 	
 	notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
 	
+	cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]}))
+	
 	model_config = lapply(good_data, function(x){x$nml})
 	
 	notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
 	write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+	write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	
 	
 	save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
@@ -207,11 +224,6 @@ out_dir = file.path(config$outdir, driver_name)
 to_run = as.character(unique(zmax$site_id))
 to_run = split(to_run, cut(seq_along(to_run), mpisize, labels = FALSE))[[mpirank+1]]
 
-#clusterExport(c1, 'driver_fun')
-#clusterExport(c1, 'secchi_standard')
-#clusterExport(c1, 'driver_name')
-#clusterExport(c1, 'driver_url')
-#clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)})
 set_driver_url(driver_url)
 
 run_name = paste0(mpirank)

From e1f3e034ad81bc36d3c747d4ff6894ed71420a0b Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Fri, 5 Aug 2016 16:06:24 -0500
Subject: [PATCH 14/42] add ability to combine output data on cluster

---
 NAMESPACE                  |   1 +
 R/combine_output_data.R    | 104 +++++++++++++++++++++++++++++++++++++
 demo/combine_output_data.R |  77 ---------------------------
 demo/mpi_output_data.R     |  12 +++++
 man/combine_output_data.Rd |  13 +++++
 5 files changed, 130 insertions(+), 77 deletions(-)
 create mode 100644 R/combine_output_data.R
 delete mode 100644 demo/combine_output_data.R
 create mode 100644 demo/mpi_output_data.R
 create mode 100644 man/combine_output_data.Rd

diff --git a/NAMESPACE b/NAMESPACE
index edbbd3b..b279342 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -9,6 +9,7 @@ export(calc_stratified_periods)
 export(calc_toha)
 export(calc_toha_stats)
 export(comb_output_table)
+export(combine_output_data)
 export(create_irr_day_cycle)
 export(driver_add_burnin_years)
 export(driver_add_rain)
diff --git a/R/combine_output_data.R b/R/combine_output_data.R
new file mode 100644
index 0000000..a112bc3
--- /dev/null
+++ b/R/combine_output_data.R
@@ -0,0 +1,104 @@
+#library(mda.lakes)
+#library(sbtools)
+#library(jsonlite)
+
+#' @title combine full sim run output data
+#' 
+#' @description Combines all the individual compute node model files into 
+#' a few files for the whole simulation
+#' 
+#' 
+#' 
+#' @export
+combine_output_data = function(sim, path){
+  #fast temp location for bundling wtr out data
+  #this needs about 60GB of scratch space available
+  fast_tmp = tempdir()
+  
+  core_path = paste0(path, sim, '/', sim, '_core_metrics.tsv')
+  cfg_path = paste0(path, sim, '/', sim, '_model_config.json')
+  hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv')
+  
+  core_metrics = comb_output_table(paste0(path, sim, '/*/best_core_metrics.tsv'), 
+                                   sep='\t', header=TRUE, as.is=TRUE)
+  
+  write.table(core_metrics, core_path, 
+              sep='\t', row.names=FALSE)
+  
+  hab_metrics =  comb_output_table(paste0(path, sim, '/*/best_hansen_hab.tsv'), 
+                                   sep='\t', header=TRUE, as.is=TRUE)
+  write.table(hab_metrics, hansen_path, 
+              sep='\t', row.names=FALSE)
+  
+  nml_files = Sys.glob(paste0(path, sim, '/*/model_config.Rdata*'))
+  
+  ###read and handle NML files
+  all_nml = list()
+  for(i in 1:length(nml_files)){
+    load(nml_files[i])
+    all_nml = c(all_nml, model_config)
+  }
+  
+  all_nml = lapply(all_nml, function(x){class(x)='list'; x})
+  #save('all_nml', file = paste0(path, sim, '/model_config.Rdata'))
+  writeLines(toJSON(all_nml), cfg_path)
+  
+  ###read and handle raw water temp data.
+  wtr_files = Sys.glob(paste0(path, sim, '/*/best_all_wtr.Rdata*'))
+  
+  all_wtr_files = c()
+  wtemp_dir = file.path(fast_tmp, sim)
+  dir.create(wtemp_dir)
+  
+  for(i in 1:length(wtr_files)){
+    load(wtr_files[i])
+    
+    newfiles = lapply(dframes, function(df){
+      site_id = df$site_id[1]
+      df$site_id = NULL
+      wtemp_path = paste0(wtemp_dir, '/', sim, '_', site_id, '.tsv')
+      
+      #the future sim periods were done separately, so they need to be appended
+      if(wtemp_path %in% all_wtr_files){
+        write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE, append=TRUE, col.names=FALSE)
+      }else{
+        write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE)
+      }
+      
+      return(wtemp_path)
+    })
+    
+    all_wtr_files = c(all_wtr_files, newfiles)
+  }
+  
+  #split up files into 1000 lake groups 
+  all_wtr_files = sort(unique(unlist(all_wtr_files)))
+  splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/500))
+  
+  wtemp_zips = file.path(path, sim, paste0(sim, '_wtemp_', seq_along(splits), '.zip'))
+  
+  #write an index file for later users
+  wtemp_zip_index = do.call(rbind, lapply(seq_along(splits), function(i){
+    data.frame(file_index=rep(basename(wtemp_zips)[i], length(splits[[i]])), 
+               file_name=basename(all_wtr_files)[splits[[i]]])
+  }))
+  wtemp_indx = file.path(path, sim, paste0(sim, '_wtemp_index.tsv'))
+  write.table(wtemp_zip_index, wtemp_indx, sep='\t', row.names=FALSE)
+  
+  
+  for(i in 1:length(splits)){
+    zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]])
+  }
+  #delete raw text files to save space
+  unlink(all_wtr_files)
+  
+  # #upload files to SB when done
+  # authenticate_sb(user, pass)
+  # itm_title = paste0('Simulated lake temperatures for ', sim, ' future projections')
+  # sim_itm = item_create(parent_id=sb_itm_root, title=itm_title)
+  # 
+  # item_append_files(sim_itm, files=c(core_path, cfg_path, hansen_path, wtemp_zips, wtemp_indx))
+  print(c(core_path, cfg_path, hansen_path, wtemp_zips, wtemp_indx))
+  return(c(core_path, cfg_path, hansen_path, wtemp_zips, wtemp_indx))
+}
+
diff --git a/demo/combine_output_data.R b/demo/combine_output_data.R
deleted file mode 100644
index a3d6091..0000000
--- a/demo/combine_output_data.R
+++ /dev/null
@@ -1,77 +0,0 @@
-library(mda.lakes)
-library(sbtools)
-library(jsonlite)
-
-combine_output_data = function(sim, path, sb_itm_root, user, pass){
-  
-  core_path = paste0(path, sim, '/best_core_metrics.tsv')
-  cfg_path = paste0(path, sim, "/model_config.json")
-  hansen_path = paste0(path, sim, '/best_hansen_hab.tsv')
-  
-  core_metrics = comb_output_table(paste0(path, sim, '/*/best_core_metrics.tsv'), 
-                                   sep='\t', header=TRUE, as.is=TRUE)
-  
-  write.table(core_metrics, core_path, 
-              sep='\t', row.names=FALSE)
-  
-  hab_metrics =  comb_output_table(paste0(path, sim, '/*/best_hansen_hab.tsv'), 
-                                   sep='\t', header=TRUE, as.is=TRUE)
-  write.table(hab_metrics, hansen_path, 
-              sep='\t', row.names=FALSE)
-  
-  nml_files = Sys.glob(paste0(path, sim, '/*/model_config.Rdata*'))
-  
-  ###read and handle NML files
-  all_nml = list()
-  for(i in 1:length(nml_files)){
-    load(nml_files[i])
-    all_nml = c(all_nml, model_config)
-  }
-  
-  all_nml = lapply(all_nml, function(x){class(x)='list'; x})
-  #save('all_nml', file = paste0(path, sim, '/model_config.Rdata'))
-  writeLines(toJSON(all_nml), cfg_path)
-  
-  ###read and handle raw water temp data.
-  # wtr_files = Sys.glob(paste0(path, sim, '/*/best_all_wtr.Rdata*'))
-  # 
-  # all_wtr_files = c()
-  # wtemp_dir = file.path(path, sim, 'wtemp')
-  # dir.create(wtemp_dir)
-  # 
-  # for(i in 1:length(wtr_files)){
-  #   load(wtr_files[i])
-  #   
-  #   newfiles = lapply(dframes, function(df){
-  #     site_id = df$site_id[1]
-  #     df$site_id = NULL
-  #     wtemp_path = paste0(wtemp_dir, '/', site_id, '.tsv')
-  #     
-  #     #the future sim periods were done separately, so they need to be appended
-  #     if(wtemp_path %in% all_wtr_files){
-  #       write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE, append=TRUE, col.names=FALSE)
-  #     }else{
-  #       write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE)
-  #     }
-  #     
-  #     return(wtemp_path)
-  #   })
-  #   
-  #   all_wtr_files = c(all_wtr_files, newfiles)
-  #   invisible(sbtools::query_sb_text('necsc', limit=1))
-  # }
-  # 
-  # all_wtr_files = unique(unlist(all_wtr_files))
-  # wtemp_zip = file.path(path, sim, 'wtemp.zip')
-  # splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/100))
-  # for(i in 1:length(splits)){
-  #   zip(zipfile=wtemp_zip, files=all_wtr_files[splits[[i]]], flags='-j -r9X')
-  # }
-  
-  authenticate_sb(user, pass)
-  itm_title = paste0('Simulated lake temperatures for ', sim, ' future projections')
-  sim_itm = item_create(parent_id=sb_itm_root, title=itm_title)
-  
-  item_append_files(sim_itm, files=c(core_path, cfg_path, hansen_path))#, wtemp_zip))
-}
-
diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R
new file mode 100644
index 0000000..a435800
--- /dev/null
+++ b/demo/mpi_output_data.R
@@ -0,0 +1,12 @@
+library(mda.lakes)
+
+
+config = read.table('config', header=TRUE, as.is=TRUE)
+
+driver_name = config$drivername
+driver_url = config$driverurl
+out_dir = file.path(config$outdir, driver_name)
+
+
+combine_output_data(driver_name, out_dir)
+
diff --git a/man/combine_output_data.Rd b/man/combine_output_data.Rd
new file mode 100644
index 0000000..d0eb538
--- /dev/null
+++ b/man/combine_output_data.Rd
@@ -0,0 +1,13 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/combine_output_data.R
+\name{combine_output_data}
+\alias{combine_output_data}
+\title{combine full sim run output data}
+\usage{
+combine_output_data(sim, path)
+}
+\description{
+Combines all the individual compute node model files into 
+a few files for the whole simulation
+}
+

From ecfa7946bf4048060075887c30ec9415febe6a66 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Fri, 5 Aug 2016 16:09:08 -0500
Subject: [PATCH 15/42] version increment

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5aebf92..d30a7d9 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.7
+Version: 4.2.8
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>

From fc12cc86192e889451ee95c64a0f70e9854ca106 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Wed, 10 Aug 2016 11:26:29 -0500
Subject: [PATCH 16/42] better handle calibration when no cal data available

---
 DESCRIPTION        |  2 +-
 demo/mpi_hab_out.R | 26 +++++++++++++++++++-------
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d30a7d9..fd00be3 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.8
+Version: 4.2.9
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index 12a25c4..3578442 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -46,12 +46,13 @@ Sys.setenv(TZ='GMT')
 # clusterEvalQ(c1, Sys.setenv(TZ='GMT'))
 
 
-future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){
+future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){
 	
 	library(lakeattributes)
 	library(mda.lakes)
 	library(dplyr)
 	library(glmtools)
+  library(lubridate)
 	
 	fastdir = tempdir()
 	#for use on WiWSC Condor pool
@@ -77,12 +78,17 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		#prep observations for calibration data
 		data(wtemp)
 		obs = filter(wtemp, site_id == nhd_id) %>%
-		  transmute(DateTime=date, Depth=depth, temp=wtemp)
+		  transmute(DateTime=date, Depth=depth, temp=wtemp) %>%
+		  filter(year(DateTime) %in% modern_era)
 		
-		#having a weird issue with resample_to_field, make unique
-		obs = obs[!duplicated(obs[,1:2]), ]
+		have_cal = nrow(obs) > 0
 		
-		write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)
+		if(have_cal){
+		  #having a weird issue with resample_to_field, make unique
+		  obs = obs[!duplicated(obs[,1:2]), ]
+		  
+		  write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)
+		}
 		
 		
 		#get driver data
@@ -125,8 +131,14 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		
 		nml = read_nml(file.path(run_dir, "glm2.nml"))
 		
-		cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
-		cal_data$site_id = site_id
+		if(have_cal){
+  		cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
+  		cal_data$site_id = site_id
+  		cat('Calibration data calculated\n')
+		}else{
+		  cal_data = data.frame() #just use empy data frame if no cal data
+		  cat('No Cal, calibration skipped\n')
+		}
 		
 		unlink(run_dir, recursive=TRUE)
 		

From 8910189f84184bb7558c2740158dfca16c80e309 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Thu, 11 Aug 2016 10:30:12 -0500
Subject: [PATCH 17/42] fixup final processing code

---
 DESCRIPTION             | 2 +-
 R/combine_output_data.R | 5 +++++
 demo/mpi_output_data.R  | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index fd00be3..8026921 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.9
+Version: 4.2.10
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index a112bc3..3a54aa6 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -15,6 +15,11 @@ combine_output_data = function(sim, path){
   #this needs about 60GB of scratch space available
   fast_tmp = tempdir()
   
+  #ensure we have a trailing / on path
+  if(!substr(path, nchar(path), nchar(path)) == '/'){
+    path = paste0(path, '/')
+  }
+  
   core_path = paste0(path, sim, '/', sim, '_core_metrics.tsv')
   cfg_path = paste0(path, sim, '/', sim, '_model_config.json')
   hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv')
diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R
index a435800..c82c325 100644
--- a/demo/mpi_output_data.R
+++ b/demo/mpi_output_data.R
@@ -5,7 +5,7 @@ config = read.table('config', header=TRUE, as.is=TRUE)
 
 driver_name = config$drivername
 driver_url = config$driverurl
-out_dir = file.path(config$outdir, driver_name)
+out_dir = config$outdir
 
 
 combine_output_data(driver_name, out_dir)

From c14ed27820b4d0da3541043f12f92b28c974ae59 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Thu, 11 Aug 2016 11:02:53 -0500
Subject: [PATCH 18/42] fix jsonlite import

---
 DESCRIPTION             | 2 +-
 NAMESPACE               | 1 +
 R/combine_output_data.R | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 8026921..f10a61a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.10
+Version: 4.2.11
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/NAMESPACE b/NAMESPACE
index b279342..b465fe4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -47,6 +47,7 @@ export(set_driver_url)
 export(summarize_notaro)
 import(GLMr)
 import(glmtools)
+import(jsonlite)
 import(lakeattributes)
 import(lubridate)
 import(rLakeAnalyzer)
diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index 3a54aa6..9bd1fb4 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -7,6 +7,7 @@
 #' @description Combines all the individual compute node model files into 
 #' a few files for the whole simulation
 #' 
+#' @import jsonlite
 #' 
 #' 
 #' @export

From 47bde1f99c61c178b255592ccbc9b283a4a129a6 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Thu, 11 Aug 2016 11:32:16 -0500
Subject: [PATCH 19/42] fix tempfile location

---
 DESCRIPTION             |  2 +-
 R/combine_output_data.R | 11 ++++++++++-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index f10a61a..4f9d173 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.11
+Version: 4.2.12
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index 9bd1fb4..5456fc2 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -14,7 +14,16 @@
 combine_output_data = function(sim, path){
   #fast temp location for bundling wtr out data
   #this needs about 60GB of scratch space available
-  fast_tmp = tempdir()
+  #if ram_scratch is there, use it
+  fast_tmp = Sys.getenv('RAM_SCRATCH', unset = '')
+  
+  if(fast_tmp == ''){
+    if(file.exists('/cxfs/scratch')){
+      fast_tmp = '/cxfs/scratch'
+    }else{
+      fast_tmp = tempdir()
+    }
+  }
   
   #ensure we have a trailing / on path
   if(!substr(path, nchar(path), nchar(path)) == '/'){

From 03bf4cd2683818e6efb14a8659a5a4d70c475e78 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Thu, 11 Aug 2016 11:59:30 -0500
Subject: [PATCH 20/42] try for NLDAS with cal out

---
 DESCRIPTION              |   2 +-
 R/combine_output_data.R  | 115 ++++++++++++++++++++++-----------------
 demo/mpi_hab_out_nldas.R |  31 ++++++++++-
 3 files changed, 95 insertions(+), 53 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4f9d173..04b5913 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.12
+Version: 4.2.13
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index 5456fc2..5567bd7 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -34,78 +34,95 @@ combine_output_data = function(sim, path){
   cfg_path = paste0(path, sim, '/', sim, '_model_config.json')
   hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv')
   
+  ################################################################################
+  ## read and handle core metrics
   core_metrics = comb_output_table(paste0(path, sim, '/*/best_core_metrics.tsv'), 
                                    sep='\t', header=TRUE, as.is=TRUE)
   
   write.table(core_metrics, core_path, 
               sep='\t', row.names=FALSE)
   
+  ################################################################################
+  ## read and handle habitat metrics
   hab_metrics =  comb_output_table(paste0(path, sim, '/*/best_hansen_hab.tsv'), 
                                    sep='\t', header=TRUE, as.is=TRUE)
   write.table(hab_metrics, hansen_path, 
               sep='\t', row.names=FALSE)
   
+  ################################################################################
+  ###read and handle NML files
   nml_files = Sys.glob(paste0(path, sim, '/*/model_config.Rdata*'))
   
-  ###read and handle NML files
-  all_nml = list()
-  for(i in 1:length(nml_files)){
-    load(nml_files[i])
-    all_nml = c(all_nml, model_config)
+  if(length(nml_files) > 0){
+    cat('Wrapping up all nml config.\n')
+    
+    all_nml = list()
+    for(i in 1:length(nml_files)){
+      load(nml_files[i])
+      all_nml = c(all_nml, model_config)
+    }
+    
+    all_nml = lapply(all_nml, function(x){class(x)='list'; x})
+    writeLines(toJSON(all_nml), cfg_path)
+  }else{
+    cat('Skipping nml config.\n')
   }
   
-  all_nml = lapply(all_nml, function(x){class(x)='list'; x})
-  #save('all_nml', file = paste0(path, sim, '/model_config.Rdata'))
-  writeLines(toJSON(all_nml), cfg_path)
-  
+  ################################################################################
   ###read and handle raw water temp data.
   wtr_files = Sys.glob(paste0(path, sim, '/*/best_all_wtr.Rdata*'))
   
-  all_wtr_files = c()
-  wtemp_dir = file.path(fast_tmp, sim)
-  dir.create(wtemp_dir)
-  
-  for(i in 1:length(wtr_files)){
-    load(wtr_files[i])
+  if(length(wtr_files) > 0){
+    cat('Wrapping up all raw wtr data.\n')
+    all_wtr_files = c()
+    wtemp_dir = file.path(fast_tmp, sim)
+    dir.create(wtemp_dir)
     
-    newfiles = lapply(dframes, function(df){
-      site_id = df$site_id[1]
-      df$site_id = NULL
-      wtemp_path = paste0(wtemp_dir, '/', sim, '_', site_id, '.tsv')
+    for(i in 1:length(wtr_files)){
+      load(wtr_files[i])
       
-      #the future sim periods were done separately, so they need to be appended
-      if(wtemp_path %in% all_wtr_files){
-        write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE, append=TRUE, col.names=FALSE)
-      }else{
-        write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE)
-      }
+      newfiles = lapply(dframes, function(df){
+        site_id = df$site_id[1]
+        df$site_id = NULL
+        wtemp_path = paste0(wtemp_dir, '/', sim, '_', site_id, '.tsv')
+        
+        #the future sim periods were done separately, so they need to be appended
+        if(wtemp_path %in% all_wtr_files){
+          write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE, append=TRUE, col.names=FALSE)
+        }else{
+          write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE)
+        }
+        
+        return(wtemp_path)
+      })
       
-      return(wtemp_path)
-    })
+      all_wtr_files = c(all_wtr_files, newfiles)
+    }
     
-    all_wtr_files = c(all_wtr_files, newfiles)
-  }
-  
-  #split up files into 1000 lake groups 
-  all_wtr_files = sort(unique(unlist(all_wtr_files)))
-  splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/500))
-  
-  wtemp_zips = file.path(path, sim, paste0(sim, '_wtemp_', seq_along(splits), '.zip'))
-  
-  #write an index file for later users
-  wtemp_zip_index = do.call(rbind, lapply(seq_along(splits), function(i){
-    data.frame(file_index=rep(basename(wtemp_zips)[i], length(splits[[i]])), 
-               file_name=basename(all_wtr_files)[splits[[i]]])
-  }))
-  wtemp_indx = file.path(path, sim, paste0(sim, '_wtemp_index.tsv'))
-  write.table(wtemp_zip_index, wtemp_indx, sep='\t', row.names=FALSE)
-  
-  
-  for(i in 1:length(splits)){
-    zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]])
+    #split up files into 1000 lake groups 
+    all_wtr_files = sort(unique(unlist(all_wtr_files)))
+    splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/500))
+    
+    wtemp_zips = file.path(path, sim, paste0(sim, '_wtemp_', seq_along(splits), '.zip'))
+    
+    #write an index file for later users
+    wtemp_zip_index = do.call(rbind, lapply(seq_along(splits), function(i){
+      data.frame(file_index=rep(basename(wtemp_zips)[i], length(splits[[i]])), 
+                 file_name=basename(all_wtr_files)[splits[[i]]])
+    }))
+    wtemp_indx = file.path(path, sim, paste0(sim, '_wtemp_index.tsv'))
+    write.table(wtemp_zip_index, wtemp_indx, sep='\t', row.names=FALSE)
+    
+    
+    for(i in 1:length(splits)){
+      zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]])
+    }
+    #delete raw text files to save space
+    unlink(all_wtr_files)
+    
+  }else{
+    cat('Skipping raw wtr data.\n')
   }
-  #delete raw text files to save space
-  unlink(all_wtr_files)
   
   # #upload files to SB when done
   # authenticate_sb(user, pass)
diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R
index be32522..72d040c 100644
--- a/demo/mpi_hab_out_nldas.R
+++ b/demo/mpi_hab_out_nldas.R
@@ -74,6 +74,21 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		#rename for dplyr
 		nhd_id = site_id
 		
+		#prep observations for calibration data
+		data(wtemp)
+		obs = filter(wtemp, site_id == nhd_id) %>%
+		  transmute(DateTime=date, Depth=depth, temp=wtemp) %>%
+		  filter(year(DateTime) %in% modern_era)
+		
+		have_cal = nrow(obs) > 0
+		
+		if(have_cal){
+		  #having a weird issue with resample_to_field, make unique
+		  obs = obs[!duplicated(obs[,1:2]), ]
+		  
+		  write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)
+		}
+		
 		#get driver data
 		driver_path = driver_function(site_id)
 		driver_path = gsub('\\\\', '/', driver_path)
@@ -114,6 +129,15 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		
 		nml = read_nml(file.path(run_dir, "glm2.nml"))
 		
+		## if we have cal, use it
+		if(have_cal){
+		  cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
+		  cal_data$site_id = site_id
+		  cat('Calibration data calculated\n')
+		}else{
+		  cal_data = data.frame() #just use empy data frame if no cal data
+		  cat('No Cal, calibration skipped\n')
+		}
 		
 		unlink(run_dir, recursive=TRUE)
 		
@@ -123,7 +147,8 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		                hansen_habitat=hansen_habitat, 
 		                site_id=site_id, 
 		                #notaro_metrics=notaro_metrics, 
-		                nml=nml)
+		                nml=nml, 
+		                cal_data=cal_data)
 		
 		return(all_data)
 		
@@ -173,13 +198,13 @@ wrapup_output = function(out, run_name, years){
 	core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
 	core_metrics = subset(core_metrics, year %in% years)
 	
-	#notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
+	cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]}))
 	
 	model_config = lapply(good_data, function(x){x$nml})
 	
 	write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-	#write.table(notaro_metrics, file.path(out_dir, 'notaro_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+	write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
 	
 	save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
 	save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))

From 759230f8d89a682b49317279f886c5bd23003c6d Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Fri, 12 Aug 2016 09:34:30 -0500
Subject: [PATCH 21/42] add cal wrapup functionality

---
 DESCRIPTION             |  2 +-
 R/combine_output_data.R | 18 ++++++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 04b5913..3d0a7ab 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.13
+Version: 4.2.14
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index 5567bd7..8d92855 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -30,9 +30,10 @@ combine_output_data = function(sim, path){
     path = paste0(path, '/')
   }
   
-  core_path = paste0(path, sim, '/', sim, '_core_metrics.tsv')
-  cfg_path = paste0(path, sim, '/', sim, '_model_config.json')
+  core_path   = paste0(path, sim, '/', sim, '_core_metrics.tsv')
+  cfg_path    = paste0(path, sim, '/', sim, '_model_config.json')
   hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv')
+  cal_path    = paste0(path, sim, '/', sim, '_calibration_data.tsv')
   
   ################################################################################
   ## read and handle core metrics
@@ -49,6 +50,19 @@ combine_output_data = function(sim, path){
   write.table(hab_metrics, hansen_path, 
               sep='\t', row.names=FALSE)
   
+  
+  ################################################################################
+  ## read and handle habitat metrics
+  if(length(Sys.glob(paste0(path, sim, '/*/best_cal_data.tsv')))){
+    cat('Cal wrapup running.\n')
+    cal_data =  comb_output_table(paste0(path, sim, '/*/best_cal_data.tsv'), 
+                                     sep='\t', header=TRUE, as.is=TRUE)
+    write.table(cal_data, cal_path, 
+                sep='\t', row.names=FALSE)
+  }else{
+    cat('Skipping cal wrapup because no cal data.\n')
+  }
+  
   ################################################################################
   ###read and handle NML files
   nml_files = Sys.glob(paste0(path, sim, '/*/model_config.Rdata*'))

From 6c7072d0c770f73062ae8d84fea28aa2da85d772 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 15 Aug 2016 12:07:50 -0500
Subject: [PATCH 22/42] change scratch dir

---
 DESCRIPTION             |  2 +-
 R/combine_output_data.R | 13 +------------
 demo/mpi_output_data.R  | 20 +++++++++++++++++++-
 3 files changed, 21 insertions(+), 14 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 3d0a7ab..b981048 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.14
+Version: 4.2.15
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index 8d92855..902cb0e 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -11,19 +11,8 @@
 #' 
 #' 
 #' @export
-combine_output_data = function(sim, path){
-  #fast temp location for bundling wtr out data
-  #this needs about 60GB of scratch space available
-  #if ram_scratch is there, use it
-  fast_tmp = Sys.getenv('RAM_SCRATCH', unset = '')
+combine_output_data = function(sim, path, fast_tmp=tempdir()){
   
-  if(fast_tmp == ''){
-    if(file.exists('/cxfs/scratch')){
-      fast_tmp = '/cxfs/scratch'
-    }else{
-      fast_tmp = tempdir()
-    }
-  }
   
   #ensure we have a trailing / on path
   if(!substr(path, nchar(path), nchar(path)) == '/'){
diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R
index c82c325..4ffd72d 100644
--- a/demo/mpi_output_data.R
+++ b/demo/mpi_output_data.R
@@ -6,7 +6,25 @@ config = read.table('config', header=TRUE, as.is=TRUE)
 driver_name = config$drivername
 driver_url = config$driverurl
 out_dir = config$outdir
+scratch_dir = config$scratch
+
+#scratch_dir = '/cxfs/projects/usgs/water/owi/lwinslow/scratch/'
+dir.create(scratch_dir)
+
+combine_output_data(driver_name, out_dir, fast_tmp=scratch_dir)
+
+# #fast temp location for bundling wtr out data
+# #this needs about 60GB of scratch space available
+# #if ram_scratch is there, use it
+# fast_tmp = Sys.getenv('RAM_SCRATCH', unset = '')
+# 
+# if(fast_tmp == ''){
+#   if(file.exists('/cxfs/scratch')){
+#     fast_tmp = '/cxfs/scratch'
+#   }else{
+#     fast_tmp = tempdir()
+#   }
+# }
 
 
-combine_output_data(driver_name, out_dir)
 

From d1fa53f161628c124842636d9e6a1c44aa824ec6 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 15 Aug 2016 12:21:43 -0500
Subject: [PATCH 23/42] fix issue with zero lenght files

---
 DESCRIPTION           | 2 +-
 R/comb_output_table.R | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b981048..b05880a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.15
+Version: 4.2.16
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/comb_output_table.R b/R/comb_output_table.R
index 70888b8..25998ca 100644
--- a/R/comb_output_table.R
+++ b/R/comb_output_table.R
@@ -20,8 +20,10 @@ comb_output_table = function(pattern, ...){
 	out = data.frame()
 	
 	for(i in 1:length(files)){
-		tmp = read.table(files[i], ...)
-		out = rbind(out, tmp)
+	  if(file.info(files[i])$size > 0){
+  		tmp = read.table(files[i], ...)
+  		out = rbind(out, tmp)
+	  }
 	}
 	
 	return(out)

From 5933e558a1b8fdd4b38cbdc73f62a80b039ff3ce Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 15 Aug 2016 12:31:11 -0500
Subject: [PATCH 24/42] scratch scratch scratch

---
 DESCRIPTION            | 2 +-
 demo/mpi_output_data.R | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index b05880a..206e1b7 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.16
+Version: 4.2.17
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R
index 4ffd72d..d1587a5 100644
--- a/demo/mpi_output_data.R
+++ b/demo/mpi_output_data.R
@@ -6,9 +6,12 @@ config = read.table('config', header=TRUE, as.is=TRUE)
 driver_name = config$drivername
 driver_url = config$driverurl
 out_dir = config$outdir
-scratch_dir = config$scratch
 
-#scratch_dir = '/cxfs/projects/usgs/water/owi/lwinslow/scratch/'
+scratch_dir = Sys.getenv('GLOBAL_SCRATCH', unset=config$scratch)
+if(is.null(scratch_dir) || scratch_dir==''){
+  scratch_dir = '/cxfs/projects/usgs/water/owi/lwinslow/scratch/'
+}
+
 dir.create(scratch_dir)
 
 combine_output_data(driver_name, out_dir, fast_tmp=scratch_dir)

From 8d993cfa0301ca4a282d551a2a843721a86e0494 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 15 Aug 2016 14:35:32 -0500
Subject: [PATCH 25/42] fix NLDAS data handling

---
 R/combine_output_data.R  | 2 +-
 demo/mpi_hab_out_nldas.R | 1 +
 demo/mpi_output_data.R   | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index 902cb0e..311f113 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -118,7 +118,7 @@ combine_output_data = function(sim, path, fast_tmp=tempdir()){
     
     
     for(i in 1:length(splits)){
-      zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]])
+      zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]], zip='zip')
     }
     #delete raw text files to save space
     unlink(all_wtr_files)
diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R
index 72d040c..f4f2e06 100644
--- a/demo/mpi_hab_out_nldas.R
+++ b/demo/mpi_hab_out_nldas.R
@@ -52,6 +52,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 	library(mda.lakes)
 	library(dplyr)
 	library(glmtools)
+  library(lubridate)
 	
 	fastdir = tempdir()
 	#for use on WiWSC Condor pool
diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R
index d1587a5..6dbb6b3 100644
--- a/demo/mpi_output_data.R
+++ b/demo/mpi_output_data.R
@@ -8,6 +8,7 @@ driver_url = config$driverurl
 out_dir = config$outdir
 
 scratch_dir = Sys.getenv('GLOBAL_SCRATCH', unset=config$scratch)
+#base case just in case
 if(is.null(scratch_dir) || scratch_dir==''){
   scratch_dir = '/cxfs/projects/usgs/water/owi/lwinslow/scratch/'
 }

From 39ac247bd679f77e6094b4d6267f485f4ff9975c Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 15 Aug 2016 16:32:22 -0500
Subject: [PATCH 26/42] add more logging to sim runs

---
 DESCRIPTION              | 2 +-
 demo/mpi_hab_out.R       | 7 +++++--
 demo/mpi_hab_out_nldas.R | 5 ++++-
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 206e1b7..567f159 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.17
+Version: 4.2.18
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index 3578442..221e9c0 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -69,7 +69,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_dri
 		
 		
 		run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1)))
-		cat(run_dir, '\n')
+		cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
 		dir.create(run_dir)
 		
 		#rename for dplyr
@@ -151,10 +151,13 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_dri
 		                nml=nml, 
 		                cal_data=cal_data)
 		
+		cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
+		
 		return(all_data)
 		
 	}, error=function(e){
-		unlink(run_dir, recursive=TRUE);
+		unlink(run_dir, recursive=TRUE)
+	  cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
 		return(list(error=e, site_id))
 	})
 }
diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R
index f4f2e06..f734ce5 100644
--- a/demo/mpi_hab_out_nldas.R
+++ b/demo/mpi_hab_out_nldas.R
@@ -69,7 +69,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		
 		
 		run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1)))
-		cat(run_dir, '\n')
+		cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
 		dir.create(run_dir)
 		
 		#rename for dplyr
@@ -151,10 +151,13 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func
 		                nml=nml, 
 		                cal_data=cal_data)
 		
+		cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
+		
 		return(all_data)
 		
 	}, error=function(e){
 		unlink(run_dir, recursive=TRUE);
+	  cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
 		return(list(error=e, site_id))
 	})
 }

From 963e96afcaeca858a7a81665edbb6b48cb4dac61 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 22 Aug 2016 12:35:09 -0500
Subject: [PATCH 27/42] try different MPI style

---
 demo/mpi_hab_out.R | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index 221e9c0..2eefc77 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -17,11 +17,15 @@
 # mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_4.1.0.tar.gz'))})
 
 
-library(Rmpi)
+#library(Rmpi)
 
-args = commandArgs(trailingOnly=TRUE)
-mpirank = mpi.comm.rank(0)
-mpisize = mpi.comm.size(0)
+#args = commandArgs(trailingOnly=TRUE)
+mpirank = Sys.getenv('SLURM_PROCID', 'NA')#mpi.comm.rank(0)
+mpisize = Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA') #mpi.comm.size(0)
+
+if(mpirank == 'NA' || mpisize == 'NA'){
+	stop('trouble finding MPIRANK or MPISIZE')
+}
 
 
 # if(mpi.comm.rank(0) != 0){

From 774752a2c822cf40e9568a6aa0871a7b3ef91f22 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 22 Aug 2016 12:35:46 -0500
Subject: [PATCH 28/42] for NLDAS

---
 demo/mpi_hab_out_nldas.R | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R
index f734ce5..be272b6 100644
--- a/demo/mpi_hab_out_nldas.R
+++ b/demo/mpi_hab_out_nldas.R
@@ -17,11 +17,15 @@
 # mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_4.1.0.tar.gz'))})
 
 
-library(Rmpi)
+#library(Rmpi)
 
-args = commandArgs(trailingOnly=TRUE)
-mpirank = mpi.comm.rank(0)
-mpisize = mpi.comm.size(0)
+#args = commandArgs(trailingOnly=TRUE)
+mpirank = Sys.getenv('SLURM_PROCID', 'NA')#mpi.comm.rank(0)
+mpisize = Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA') #mpi.comm.size(0)
+
+if(mpirank == 'NA' || mpisize == 'NA'){
+	stop('trouble finding MPIRANK or MPISIZE')
+}
 
 
 # if(mpi.comm.rank(0) != 0){

From 37a8c1d360d84aeec824494c765fb45270964d22 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 22 Aug 2016 12:42:14 -0500
Subject: [PATCH 29/42] fix numeric type

---
 demo/mpi_hab_out.R       | 6 +++---
 demo/mpi_hab_out_nldas.R | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index 2eefc77..b7face6 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -20,10 +20,10 @@
 #library(Rmpi)
 
 #args = commandArgs(trailingOnly=TRUE)
-mpirank = Sys.getenv('SLURM_PROCID', 'NA')#mpi.comm.rank(0)
-mpisize = Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA') #mpi.comm.size(0)
+mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0)
+mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0)
 
-if(mpirank == 'NA' || mpisize == 'NA'){
+if(is.na(mpirank)|| is.na(mpisize)){
 	stop('trouble finding MPIRANK or MPISIZE')
 }
 
diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R
index be272b6..ed20a21 100644
--- a/demo/mpi_hab_out_nldas.R
+++ b/demo/mpi_hab_out_nldas.R
@@ -20,10 +20,10 @@
 #library(Rmpi)
 
 #args = commandArgs(trailingOnly=TRUE)
-mpirank = Sys.getenv('SLURM_PROCID', 'NA')#mpi.comm.rank(0)
-mpisize = Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA') #mpi.comm.size(0)
+mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0)
+mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0)
 
-if(mpirank == 'NA' || mpisize == 'NA'){
+if(is.na(mpirank)|| is.na(mpisize)){
 	stop('trouble finding MPIRANK or MPISIZE')
 }
 

From 7f7ab383dde7bfb7a644f7b74717664ff0b040e0 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 22 Aug 2016 12:45:58 -0500
Subject: [PATCH 30/42] for NLDAS

---
 demo/mpi_hab_out.R       | 2 ++
 demo/mpi_hab_out_nldas.R | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index b7face6..ba995bf 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -22,6 +22,8 @@
 #args = commandArgs(trailingOnly=TRUE)
 mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0)
 mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0)
+cat('MPIRANK:', mpirank, '\n')
+cat('MPISIZE:', mpisize, '\n')
 
 if(is.na(mpirank)|| is.na(mpisize)){
 	stop('trouble finding MPIRANK or MPISIZE')
diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R
index ed20a21..7fb0ebe 100644
--- a/demo/mpi_hab_out_nldas.R
+++ b/demo/mpi_hab_out_nldas.R
@@ -22,6 +22,8 @@
 #args = commandArgs(trailingOnly=TRUE)
 mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0)
 mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0)
+cat('MPIRANK:', mpirank, '\n')
+cat('MPISIZE:', mpisize, '\n')
 
 if(is.na(mpirank)|| is.na(mpisize)){
 	stop('trouble finding MPIRANK or MPISIZE')

From cc389bb48112b87cdd3462ea7dc94156f5c80317 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Mon, 22 Aug 2016 12:48:59 -0500
Subject: [PATCH 31/42] fix usage of is.numeric

---
 demo/mpi_hab_out.R       | 4 ++--
 demo/mpi_hab_out_nldas.R | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R
index ba995bf..b8d19e5 100644
--- a/demo/mpi_hab_out.R
+++ b/demo/mpi_hab_out.R
@@ -20,8 +20,8 @@
 #library(Rmpi)
 
 #args = commandArgs(trailingOnly=TRUE)
-mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0)
-mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0)
+mpirank = as.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0)
+mpisize = as.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0)
 cat('MPIRANK:', mpirank, '\n')
 cat('MPISIZE:', mpisize, '\n')
 
diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R
index 7fb0ebe..96994c8 100644
--- a/demo/mpi_hab_out_nldas.R
+++ b/demo/mpi_hab_out_nldas.R
@@ -20,8 +20,8 @@
 #library(Rmpi)
 
 #args = commandArgs(trailingOnly=TRUE)
-mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0)
-mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0)
+mpirank = as.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0)
+mpisize = as.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0)
 cat('MPIRANK:', mpirank, '\n')
 cat('MPISIZE:', mpisize, '\n')
 

From 061c45384994055b4923b8e079f58edaeaea3f5d Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Thu, 25 Aug 2016 14:30:07 -0500
Subject: [PATCH 32/42] new job running script

---
 demo/slurm_array_hab_out_god_mode.R | 259 ++++++++++++++++++++++++++++
 1 file changed, 259 insertions(+)
 create mode 100644 demo/slurm_array_hab_out_god_mode.R

diff --git a/demo/slurm_array_hab_out_god_mode.R b/demo/slurm_array_hab_out_god_mode.R
new file mode 100644
index 0000000..0f99553
--- /dev/null
+++ b/demo/slurm_array_hab_out_god_mode.R
@@ -0,0 +1,259 @@
+#slurm_array_hab_out_god_mode
+## This uses SLURM array running to run individual lakes
+## MPI has been too troublesome
+
+#first load some modules:
+# module load tools/netcdf-4.3.2-gnu
+
+
+library(lakeattributes)
+library(mda.lakes)
+library(dplyr)
+library(glmtools)
+source(system.file('demo/common_running_functions.R', package='mda.lakes'))
+
+Sys.setenv(TZ='GMT')
+
+# clusterEvalQ(c1, Sys.setenv(TZ='GMT'))
+
+
+future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){
+  
+  library(lakeattributes)
+  library(mda.lakes)
+  library(dplyr)
+  library(glmtools)
+  library(lubridate)
+  
+  fastdir = tempdir()
+  #for use on WiWSC Condor pool
+  if(file.exists('/mnt/ramdisk')){
+    fastdir = '/mnt/ramdisk'
+  }
+  #for use on YETI
+  if(Sys.getenv('RAM_SCRATCH', unset = '') != ''){
+    fastdir = Sys.getenv('RAM_SCRATCH', unset = '')
+  }
+  
+  
+  tryCatch({
+    
+    run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1)))
+    cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
+    dir.create(run_dir)
+    
+    #rename for dplyr
+    nhd_id = site_id
+    
+    #prep observations for calibration data
+    data(wtemp)
+    obs = filter(wtemp, site_id == nhd_id) %>%
+      transmute(DateTime=date, Depth=depth, temp=wtemp) %>%
+      filter(year(DateTime) %in% modern_era)
+    
+    have_cal = nrow(obs) > 0
+    
+    if(have_cal){
+      #having a weird issue with resample_to_field, make unique
+      obs = obs[!duplicated(obs[,1:2]), ]
+      
+      write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)
+    }
+    
+    
+    #get driver data
+    driver_path = driver_function(site_id)
+    driver_path = gsub('\\\\', '/', driver_path)
+    
+    
+    kd_avg = secchi_function(site_id) #secchi_conv/mean(kds$secchi_avg, na.rm=TRUE)
+    
+    #run with different driver and ice sources
+    
+    prep_run_glm_kd(site_id=site_id, 
+                    path=run_dir, 
+                    years=modern_era,
+                    kd=kd_avg, 
+                    nml_args=c(list(
+                      dt=3600, subdaily=FALSE, nsave=24, 
+                      timezone=-6,
+                      csv_point_nlevs=0, 
+                      snow_albedo_factor=1.1, 
+                      meteo_fl=driver_path, 
+                      cd=getCD(site_id, method='Hondzo')), 
+                      nml_args))
+    
+    
+    ##parse the habitat and WTR info. next run will clobber output.nc
+    wtr_all = get_temp(file.path(run_dir, 'output.nc'), reference='surface')
+    ## drop the first n burn-in years
+    #years = as.POSIXlt(wtr$DateTime)$year + 1900
+    #to_keep = !(years <= min(years) + nburn - 1)
+    #wtr_all = wtr[to_keep, ]
+    
+    core_metrics = necsc_thermal_metrics_core(run_dir, site_id)
+    
+    hansen_habitat = hansen_habitat_calc(run_dir, site_id)
+    
+    notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc'))
+    
+    nml = read_nml(file.path(run_dir, "glm2.nml"))
+    
+    if(have_cal){
+      cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
+      cal_data$site_id = site_id
+      cat('Calibration data calculated\n')
+    }else{
+      cal_data = data.frame() #just use empy data frame if no cal data
+      cat('No Cal, calibration skipped\n')
+    }
+    
+    unlink(run_dir, recursive=TRUE)
+    
+    notaro_metrics$site_id = site_id
+    
+    all_data = list(wtr=wtr_all, core_metrics=core_metrics, 
+                    hansen_habitat=hansen_habitat, 
+                    site_id=site_id, 
+                    notaro_metrics=notaro_metrics, 
+                    nml=nml, 
+                    cal_data=cal_data)
+    
+    cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
+    
+    return(all_data)
+    
+  }, error=function(e){
+    unlink(run_dir, recursive=TRUE)
+    cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
+    return(list(error=e, site_id))
+  })
+}
+
+
+
+getnext = function(fname){
+  i=0
+  barefname = fname
+  while(file.exists(fname)){
+    i=i+1
+    fname = paste0(barefname, '.', i)
+  }
+  return(fname)
+}
+
+wrapup_output = function(out, out_dir, years){
+  
+  run_exists = file.exists(out_dir)
+  
+  if(!run_exists) {dir.create(out_dir, recursive=TRUE)}
+  
+  good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
+  bad_data  = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
+  
+  sprintf('%i lakes ran\n', length(good_data))
+  dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
+  #drop the burn-in years
+  dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
+  
+  hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
+  hansen_habitat = subset(hansen_habitat, year %in% years)
+  
+  core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
+  core_metrics = subset(core_metrics, year %in% years)
+  
+  notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
+  
+  cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]}))
+  
+  model_config = lapply(good_data, function(x){x$nml})
+  
+  notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
+  write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file))
+  write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+  write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+  write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+  
+  
+  save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
+  save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
+  save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
+  
+  rm(out, good_data, dframes)
+  gc()
+}
+
+
+################################################################################
+## Lets run Downscaled climate runs 1980-1999, 2020-2039, 2080:2099
+################################################################################
+gcm_driver_fun = function(site_id, dname){
+  drivers = read.csv(get_driver_path(paste0(site_id, ''), driver_name = dname, timestep = 'daily'), header=TRUE)
+  #nldas   = read.csv(get_driver_path(paste0(site_id, ''), driver_name = 'NLDAS'), header=TRUE)
+  #drivers = driver_nldas_debias_airt_sw(drivers, nldas)
+  drivers = driver_add_burnin_years(drivers, nyears=2)
+  drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
+  driver_save(drivers)
+}
+
+nldas_driver_fun = function(site_id, dname){
+  nldas = read.csv(get_driver_path(site_id, driver_name = dname), header=TRUE)
+  drivers = driver_nldas_wind_debias(nldas)
+  drivers = driver_add_burnin_years(drivers, nyears=2)
+  drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
+  #fix the 2-day offset in NLDAS data
+  drivers$time = drivers$time + as.difftime(-2, units='days')
+  driver_save(drivers)
+}
+
+#This should be 1 to n, where n may be larger than total number of lakes to model
+task_id = as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID', 'NA'))
+
+if(is.na(task_id)){
+  stop("ERROR Can not read task_id, NA returned")
+}
+
+task_offset = as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID_offset', 'NA'))
+if(is.na(task_offset)){
+  task_offset = 0
+}
+task_id = task_id + task_offset
+
+config = read.table('config', header=TRUE, as.is=TRUE)
+
+driver_name = config$drivername
+driver_url = config$driverurl
+out_dir = file.path(config$outdir, driver_name)
+set_driver_url(driver_url)
+
+if(driver_name == 'NLDAS'){
+  driver_fun = nldas_driver_fun
+  yeargroups = list(1979:2015)
+}else{
+  driver_fun = gcm_driver_fun
+  yeargroups = list(1981:2000, 2040:2059, 2080:2099)
+}
+
+to_run = as.character(unique(zmax$site_id))
+if(task_id > length(to_run)){
+  sprintf('Skipping task_id:%i because greater than number of lakes to run')
+  q(save='no', status=0)
+}
+
+site_id = to_run[task_id]
+
+
+for(ygroup in yeargroups){
+  start = Sys.time()
+  out = lapply(site_id, future_hab_wtr, 
+               modern_era=ygroup, 
+               secchi_function=secchi_standard,
+               driver_function=function(site_id){driver_fun(site_id, driver_name)})
+  
+  wrapup_output(out, file.path(out_dir, site_id), years=ygroup)
+  
+  print(difftime(Sys.time(), start, units='hours'))
+  cat('on to the next\n')
+}
+
+

From 05bb77f99a9e7c8841c524ae411fea78b8e6fb09 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Thu, 25 Aug 2016 15:23:20 -0500
Subject: [PATCH 33/42] change albedo multiplier

---
 demo/slurm_array_hab_out_god_mode.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demo/slurm_array_hab_out_god_mode.R b/demo/slurm_array_hab_out_god_mode.R
index 0f99553..5296f0b 100644
--- a/demo/slurm_array_hab_out_god_mode.R
+++ b/demo/slurm_array_hab_out_god_mode.R
@@ -78,7 +78,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_dri
                       dt=3600, subdaily=FALSE, nsave=24, 
                       timezone=-6,
                       csv_point_nlevs=0, 
-                      snow_albedo_factor=1.1, 
+                      snow_albedo_factor=1, 
                       meteo_fl=driver_path, 
                       cd=getCD(site_id, method='Hondzo')), 
                       nml_args))

From e9f332053d150b4421dbc3d941f3344fcc68244f Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Thu, 25 Aug 2016 15:24:26 -0500
Subject: [PATCH 34/42] version increment

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 567f159..d2c3c83 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.18
+Version: 4.2.19
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>

From 1fa014378fad8a1285625cad3690246b70ebda5f Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Sat, 27 Aug 2016 09:04:05 -0500
Subject: [PATCH 35/42] change albedo factor to fix late off bias

---
 demo/slurm_array_hab_out_god_mode.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/demo/slurm_array_hab_out_god_mode.R b/demo/slurm_array_hab_out_god_mode.R
index 5296f0b..297742a 100644
--- a/demo/slurm_array_hab_out_god_mode.R
+++ b/demo/slurm_array_hab_out_god_mode.R
@@ -78,7 +78,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_dri
                       dt=3600, subdaily=FALSE, nsave=24, 
                       timezone=-6,
                       csv_point_nlevs=0, 
-                      snow_albedo_factor=1, 
+                      snow_albedo_factor=0.85, 
                       meteo_fl=driver_path, 
                       cd=getCD(site_id, method='Hondzo')), 
                       nml_args))

From fa08b457acb5f7b84874a9a536e78cd2254c9767 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Sat, 27 Aug 2016 09:04:26 -0500
Subject: [PATCH 36/42] version increment

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index d2c3c83..f7bcd95 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.19
+Version: 4.2.20
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>

From 3d3162afd84a5880ea8a8763e0bca4b0817a56e3 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Thu, 1 Sep 2016 16:07:59 -0500
Subject: [PATCH 37/42] increase size of zipfiles

---
 R/combine_output_data.R    | 2 +-
 man/combine_output_data.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index 311f113..2e3059e 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -104,7 +104,7 @@ combine_output_data = function(sim, path, fast_tmp=tempdir()){
     
     #split up files into 1000 lake groups 
     all_wtr_files = sort(unique(unlist(all_wtr_files)))
-    splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/500))
+    splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/1000))
     
     wtemp_zips = file.path(path, sim, paste0(sim, '_wtemp_', seq_along(splits), '.zip'))
     
diff --git a/man/combine_output_data.Rd b/man/combine_output_data.Rd
index d0eb538..7747dda 100644
--- a/man/combine_output_data.Rd
+++ b/man/combine_output_data.Rd
@@ -4,7 +4,7 @@
 \alias{combine_output_data}
 \title{combine full sim run output data}
 \usage{
-combine_output_data(sim, path)
+combine_output_data(sim, path, fast_tmp = tempdir())
 }
 \description{
 Combines all the individual compute node model files into 

From fef2d0118090fe2d1200fce178626528032d0db1 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Tue, 6 Sep 2016 13:31:45 -0500
Subject: [PATCH 38/42] sb archive and fix index dl

---
 DESCRIPTION             |  2 +-
 NAMESPACE               |  2 ++
 R/get_driver_nhd.R      |  4 +++
 R/sb_archive_model.R    | 54 +++++++++++++++++++++++++++++++++++++++++
 man/sb_archive_model.Rd | 21 ++++++++++++++++
 5 files changed, 82 insertions(+), 1 deletion(-)
 create mode 100644 R/sb_archive_model.R
 create mode 100644 man/sb_archive_model.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index f7bcd95..ab134c5 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.20
+Version: 4.2.21
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/NAMESPACE b/NAMESPACE
index b465fe4..b7f3777 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -42,6 +42,7 @@ export(necsc_thermal_metrics_core)
 export(opti_thermal_habitat)
 export(populate_base_lake_nml)
 export(prep_run_glm_kd)
+export(sb_archive_model)
 export(sens_seasonal_site)
 export(set_driver_url)
 export(summarize_notaro)
@@ -51,6 +52,7 @@ import(jsonlite)
 import(lakeattributes)
 import(lubridate)
 import(rLakeAnalyzer)
+import(sbtools)
 import(tidyr)
 importFrom(accelerometry,rle2)
 importFrom(insol,JD)
diff --git a/R/get_driver_nhd.R b/R/get_driver_nhd.R
index 7af6fb8..6aee4a8 100644
--- a/R/get_driver_nhd.R
+++ b/R/get_driver_nhd.R
@@ -106,6 +106,10 @@ get_driver_index = function(driver_name, loc_cache=TRUE){
 	
 	if(substr(pkg_info$dvr_url, nchar(pkg_info$dvr_url)-3,nchar(pkg_info$dvr_url)) == '.zip'){
 		unzip(pkg_info$dvr_url, files = paste0('drivers_GLM_', driver_name, '/driver_index.tsv'), exdir=dirname(dest), junkpaths=TRUE)
+	}else if(substr(index_url, 1,7) == 'file://'){
+	  
+	  dest = index_url
+	  
 	}else{
 		if(!download_helper(index_url, dest)){
 			stop('driver_index.tsv: unable to download for driver data:', driver_name)
diff --git a/R/sb_archive_model.R b/R/sb_archive_model.R
new file mode 100644
index 0000000..04d2b36
--- /dev/null
+++ b/R/sb_archive_model.R
@@ -0,0 +1,54 @@
+#' @title Upload and archive model to ScienceBase
+#' 
+#' @description 
+#' Creates relevant child items a
+#' 
+#' @param path Path to folder containing sim files
+#' @param sb_root SB root item ID to create new items under
+#' @param sbuser SB user name
+#' @param sbpass SB password
+#' 
+#' @import sbtools
+#' 
+#' @export
+sb_archive_model = function(path, sb_root, sbuser, sbpass){
+  
+  sim = basename(path)
+  allfiles = dir(path)
+  ndone = 0
+  pb = txtProgressBar(min = 0, max = length(allfiles))
+  
+  authenticate_sb(sbuser, sbpass)
+  itm_title = paste0('Simulated lake temp metrics for ', sim)
+  sim_itm = item_create(parent_id=sb_root, title=itm_title)
+  
+  ## core metrics create/upload
+  core_met = item_create(parent_id=sim_itm, title=paste0(sim, ':Core thermal metrics'))
+  files = Sys.glob(file.path(path, '*_core_metrics.tsv'))
+  item_append_files(core_met, files)
+  setTxtProgressBar(pb, (ndone <- ndone+1))
+  
+  ## fish habitat
+  fish_hab = item_create(parent_id=sim_itm, title=paste0(sim, ':Fish habitat metrics'))
+  files = Sys.glob(file.path(path, '*_fish_hab.tsv'))
+  item_append_files(fish_hab, files)
+  setTxtProgressBar(pb, (ndone <- ndone+1))
+  
+  ## model configuration
+  mod_conf = item_create(parent_id=sim_itm, title=paste0(sim, ':Model configuration input'))
+  files = Sys.glob(file.path(path, '*_model_config.json'))
+  item_append_files(mod_conf, files)
+  setTxtProgressBar(pb, (ndone <- ndone+1))
+  
+  
+  cat('starting wtr files upload...')
+  ## water temp files
+  wtr_raw = item_create(parent_id=sim_itm, title=paste0(sim, ':Raw water temperature'))
+  files = Sys.glob(file.path(path, '*_wtemp_*'))
+  for(i in 1:length(files)){
+    item_append_files(wtr_raw, files=files[i])
+    setTxtProgressBar(pb, (ndone <- ndone+1))
+  }
+  
+  close(pb)
+}
diff --git a/man/sb_archive_model.Rd b/man/sb_archive_model.Rd
new file mode 100644
index 0000000..e03c56a
--- /dev/null
+++ b/man/sb_archive_model.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/sb_archive_model.R
+\name{sb_archive_model}
+\alias{sb_archive_model}
+\title{Upload and archive model to ScienceBase}
+\usage{
+sb_archive_model(path, sb_root, sbuser, sbpass)
+}
+\arguments{
+\item{path}{Path to folder containing sim files}
+
+\item{sb_root}{SB root item ID to create new items under}
+
+\item{sbuser}{SB user name}
+
+\item{sbpass}{SB password}
+}
+\description{
+Creates relevant child items a
+}
+

From df601e0eb6d5f89e251c9c057d874526acea9427 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Wed, 7 Sep 2016 10:22:33 -0500
Subject: [PATCH 39/42] combine error files on finish as well

---
 DESCRIPTION             |  2 +-
 R/combine_output_data.R | 17 +++++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index ab134c5..7adcf5c 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.21
+Version: 4.2.22
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/combine_output_data.R b/R/combine_output_data.R
index 2e3059e..f956076 100644
--- a/R/combine_output_data.R
+++ b/R/combine_output_data.R
@@ -23,6 +23,7 @@ combine_output_data = function(sim, path, fast_tmp=tempdir()){
   cfg_path    = paste0(path, sim, '/', sim, '_model_config.json')
   hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv')
   cal_path    = paste0(path, sim, '/', sim, '_calibration_data.tsv')
+  error_path  = paste0(path, sim, '/', sim, '_error_output.tsv')
   
   ################################################################################
   ## read and handle core metrics
@@ -71,6 +72,22 @@ combine_output_data = function(sim, path, fast_tmp=tempdir()){
     cat('Skipping nml config.\n')
   }
   
+  ################################################################################
+  ### read and handle error files
+  bad_files = Sys.glob(paste0(path, sim, '/*/bad_data.Rdata*'))
+  
+  bad_data = list()
+  
+  for(i in 1:length(bad_files)){
+    tmp = new.env()
+    load(bad_files[i], envir = tmp)
+    
+    bad_data = c(bad_data, tmp$bad_data)
+  }
+  
+  save(bad_data, file=error_path)
+  
+  
   ################################################################################
   ###read and handle raw water temp data.
   wtr_files = Sys.glob(paste0(path, sim, '/*/best_all_wtr.Rdata*'))

From a4d75440edf91015bd5c286c82e8bb24198d11dd Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Wed, 7 Sep 2016 11:16:14 -0500
Subject: [PATCH 40/42] change organization of running code

---
 DESCRIPTION                           |   2 +-
 R/run_necsc_lake.R                    | 240 ++++++++++++++++++++++++++
 R/slurm_array_cleanup_missing_lakes.R |  31 ++++
 demo/slurm_array_hab_out_god_mode.R   | 225 +-----------------------
 4 files changed, 276 insertions(+), 222 deletions(-)
 create mode 100644 R/run_necsc_lake.R
 create mode 100644 R/slurm_array_cleanup_missing_lakes.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 7adcf5c..30c7178 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: mda.lakes
 Type: Package
 Title: Tools for combining models, data, and processing for lakes
-Version: 4.2.22
+Version: 4.3.0
 Date: 2015-12-03
 Author: Luke Winslow, Jordan Read
 Maintainer: Luke Winslow <lwinslow@usgs.gov>
diff --git a/R/run_necsc_lake.R b/R/run_necsc_lake.R
new file mode 100644
index 0000000..810cbc2
--- /dev/null
+++ b/R/run_necsc_lake.R
@@ -0,0 +1,240 @@
+
+
+
+#' @title Large wrapper function NECSC mod run
+#' 
+#' @description 
+#' Runs a single NECSC lake given the default configuration for 
+#' both NLDAS and Notaro drivers
+#' 
+#' @export
+run_necsc_lake = function(site_id = NA, driver_name, out_dir){
+  
+  if(is.na(site_id)){
+    stop("ERROR site_id cannot be NA")
+  }
+  
+  library(lakeattributes)
+  library(mda.lakes)
+  library(dplyr)
+  library(glmtools)
+  source(system.file('demo/common_running_functions.R', package='mda.lakes'))
+  
+  Sys.setenv(TZ='GMT')
+  
+  
+  future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){
+    
+    library(lakeattributes)
+    library(mda.lakes)
+    library(dplyr)
+    library(glmtools)
+    library(lubridate)
+    
+    fastdir = tempdir()
+    #for use on WiWSC Condor pool
+    if(file.exists('/mnt/ramdisk')){
+      fastdir = '/mnt/ramdisk'
+    }
+    #for use on YETI
+    if(Sys.getenv('RAM_SCRATCH', unset = '') != ''){
+      fastdir = Sys.getenv('RAM_SCRATCH', unset = '')
+    }
+    
+    
+    tryCatch({
+      
+      run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1)))
+      cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
+      dir.create(run_dir)
+      
+      #rename for dplyr
+      nhd_id = site_id
+      
+      #prep observations for calibration data
+      data(wtemp)
+      obs = filter(wtemp, site_id == nhd_id) %>%
+        transmute(DateTime=date, Depth=depth, temp=wtemp) %>%
+        filter(year(DateTime) %in% modern_era)
+      
+      have_cal = nrow(obs) > 0
+      
+      if(have_cal){
+        #having a weird issue with resample_to_field, make unique
+        obs = obs[!duplicated(obs[,1:2]), ]
+        
+        write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)
+      }
+      
+      
+      #get driver data
+      driver_path = driver_function(site_id)
+      driver_path = gsub('\\\\', '/', driver_path)
+      
+      
+      kd_avg = secchi_function(site_id) #secchi_conv/mean(kds$secchi_avg, na.rm=TRUE)
+      
+      #run with different driver and ice sources
+      
+      prep_run_glm_kd(site_id=site_id, 
+                      path=run_dir, 
+                      years=modern_era,
+                      kd=kd_avg, 
+                      nml_args=c(list(
+                        dt=3600, subdaily=FALSE, nsave=24, 
+                        timezone=-6,
+                        csv_point_nlevs=0, 
+                        snow_albedo_factor=0.85, 
+                        meteo_fl=driver_path, 
+                        cd=getCD(site_id, method='Hondzo')), 
+                        nml_args))
+      
+      
+      ##parse the habitat and WTR info. next run will clobber output.nc
+      wtr_all = get_temp(file.path(run_dir, 'output.nc'), reference='surface')
+      ## drop the first n burn-in years
+      #years = as.POSIXlt(wtr$DateTime)$year + 1900
+      #to_keep = !(years <= min(years) + nburn - 1)
+      #wtr_all = wtr[to_keep, ]
+      
+      core_metrics = necsc_thermal_metrics_core(run_dir, site_id)
+      
+      hansen_habitat = hansen_habitat_calc(run_dir, site_id)
+      
+      notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc'))
+      
+      nml = read_nml(file.path(run_dir, "glm2.nml"))
+      
+      if(have_cal){
+        cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
+        cal_data$site_id = site_id
+        cat('Calibration data calculated\n')
+      }else{
+        cal_data = data.frame() #just use empy data frame if no cal data
+        cat('No Cal, calibration skipped\n')
+      }
+      
+      unlink(run_dir, recursive=TRUE)
+      
+      notaro_metrics$site_id = site_id
+      
+      all_data = list(wtr=wtr_all, core_metrics=core_metrics, 
+                      hansen_habitat=hansen_habitat, 
+                      site_id=site_id, 
+                      notaro_metrics=notaro_metrics, 
+                      nml=nml, 
+                      cal_data=cal_data)
+      
+      cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
+      
+      return(all_data)
+      
+    }, error=function(e){
+      unlink(run_dir, recursive=TRUE)
+      cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
+      return(list(error=e, site_id))
+    })
+  }
+  
+  
+  
+  getnext = function(fname){
+    i=0
+    barefname = fname
+    while(file.exists(fname)){
+      i=i+1
+      fname = paste0(barefname, '.', i)
+    }
+    return(fname)
+  }
+  
+  wrapup_output = function(out, out_dir, years){
+    
+    run_exists = file.exists(out_dir)
+    
+    if(!run_exists) {dir.create(out_dir, recursive=TRUE)}
+    
+    good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
+    bad_data  = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
+    
+    sprintf('%i lakes ran\n', length(good_data))
+    dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
+    #drop the burn-in years
+    dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
+    
+    hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
+    hansen_habitat = subset(hansen_habitat, year %in% years)
+    
+    core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
+    core_metrics = subset(core_metrics, year %in% years)
+    
+    notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
+    
+    cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]}))
+    
+    model_config = lapply(good_data, function(x){x$nml})
+    
+    notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
+    write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file))
+    write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+    write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+    write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+    
+    
+    save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
+    save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
+    save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
+    
+    rm(out, good_data, dframes)
+    gc()
+  }
+  
+  
+  ################################################################################
+  ## Lets run Downscaled climate runs 1980-1999, 2020-2039, 2080:2099
+  ################################################################################
+  gcm_driver_fun = function(site_id, dname){
+    drivers = read.csv(get_driver_path(paste0(site_id, ''), driver_name = dname, timestep = 'daily'), header=TRUE)
+    #nldas   = read.csv(get_driver_path(paste0(site_id, ''), driver_name = 'NLDAS'), header=TRUE)
+    #drivers = driver_nldas_debias_airt_sw(drivers, nldas)
+    drivers = driver_add_burnin_years(drivers, nyears=2)
+    drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
+    driver_save(drivers)
+  }
+  
+  nldas_driver_fun = function(site_id, dname){
+    nldas = read.csv(get_driver_path(site_id, driver_name = dname), header=TRUE)
+    drivers = driver_nldas_wind_debias(nldas)
+    drivers = driver_add_burnin_years(drivers, nyears=2)
+    drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
+    #fix the 2-day offset in NLDAS data
+    drivers$time = drivers$time + as.difftime(-2, units='days')
+    driver_save(drivers)
+  }
+  
+  
+  
+  if(driver_name == 'NLDAS'){
+    driver_fun = nldas_driver_fun
+    yeargroups = list(1979:2015)
+  }else{
+    driver_fun = gcm_driver_fun
+    yeargroups = list(1981:2000, 2040:2059, 2080:2099)
+  }
+  
+  
+  for(ygroup in yeargroups){
+    start = Sys.time()
+    out = lapply(site_id, future_hab_wtr, 
+                 modern_era=ygroup, 
+                 secchi_function=secchi_standard,
+                 driver_function=function(site_id){driver_fun(site_id, driver_name)})
+    
+    wrapup_output(out, file.path(out_dir, site_id), years=ygroup)
+    
+    print(difftime(Sys.time(), start, units='hours'))
+    cat('on to the next\n')
+  }
+  
+  
+}
\ No newline at end of file
diff --git a/R/slurm_array_cleanup_missing_lakes.R b/R/slurm_array_cleanup_missing_lakes.R
new file mode 100644
index 0000000..cbb2a8a
--- /dev/null
+++ b/R/slurm_array_cleanup_missing_lakes.R
@@ -0,0 +1,31 @@
+### Cleanup missing run lakes
+
+library(lakeattributes)
+library(mda.lakes)
+
+################################################################################
+### read in config file config
+config = read.table('config', header=TRUE, as.is=TRUE)
+
+driver_name = config$drivername
+driver_url = config$driverurl
+out_dir = file.path(config$outdir, driver_name)
+set_driver_url(driver_url)
+
+rundirs = Sys.glob(file.path(out_dir, '*'))
+
+for(i in 1:length(rundirs)){
+  #NLDAS will have at least 5 outfiles
+  if(length(dir(rundirs[i])) > 4){
+    next
+  }
+  
+  site_id = basename(rundirs[i])
+  cat('Running ', site_id, '...\n')
+  
+  run_necsc_lake(site_id, driver_name, out_dir)
+}
+
+
+
+
diff --git a/demo/slurm_array_hab_out_god_mode.R b/demo/slurm_array_hab_out_god_mode.R
index 297742a..02edd77 100644
--- a/demo/slurm_array_hab_out_god_mode.R
+++ b/demo/slurm_array_hab_out_god_mode.R
@@ -2,210 +2,11 @@
 ## This uses SLURM array running to run individual lakes
 ## MPI has been too troublesome
 
-#first load some modules:
-# module load tools/netcdf-4.3.2-gnu
-
-
 library(lakeattributes)
 library(mda.lakes)
-library(dplyr)
-library(glmtools)
-source(system.file('demo/common_running_functions.R', package='mda.lakes'))
-
-Sys.setenv(TZ='GMT')
-
-# clusterEvalQ(c1, Sys.setenv(TZ='GMT'))
-
-
-future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){
-  
-  library(lakeattributes)
-  library(mda.lakes)
-  library(dplyr)
-  library(glmtools)
-  library(lubridate)
-  
-  fastdir = tempdir()
-  #for use on WiWSC Condor pool
-  if(file.exists('/mnt/ramdisk')){
-    fastdir = '/mnt/ramdisk'
-  }
-  #for use on YETI
-  if(Sys.getenv('RAM_SCRATCH', unset = '') != ''){
-    fastdir = Sys.getenv('RAM_SCRATCH', unset = '')
-  }
-  
-  
-  tryCatch({
-    
-    run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1)))
-    cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
-    dir.create(run_dir)
-    
-    #rename for dplyr
-    nhd_id = site_id
-    
-    #prep observations for calibration data
-    data(wtemp)
-    obs = filter(wtemp, site_id == nhd_id) %>%
-      transmute(DateTime=date, Depth=depth, temp=wtemp) %>%
-      filter(year(DateTime) %in% modern_era)
-    
-    have_cal = nrow(obs) > 0
-    
-    if(have_cal){
-      #having a weird issue with resample_to_field, make unique
-      obs = obs[!duplicated(obs[,1:2]), ]
-      
-      write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE)
-    }
-    
-    
-    #get driver data
-    driver_path = driver_function(site_id)
-    driver_path = gsub('\\\\', '/', driver_path)
-    
-    
-    kd_avg = secchi_function(site_id) #secchi_conv/mean(kds$secchi_avg, na.rm=TRUE)
-    
-    #run with different driver and ice sources
-    
-    prep_run_glm_kd(site_id=site_id, 
-                    path=run_dir, 
-                    years=modern_era,
-                    kd=kd_avg, 
-                    nml_args=c(list(
-                      dt=3600, subdaily=FALSE, nsave=24, 
-                      timezone=-6,
-                      csv_point_nlevs=0, 
-                      snow_albedo_factor=0.85, 
-                      meteo_fl=driver_path, 
-                      cd=getCD(site_id, method='Hondzo')), 
-                      nml_args))
-    
-    
-    ##parse the habitat and WTR info. next run will clobber output.nc
-    wtr_all = get_temp(file.path(run_dir, 'output.nc'), reference='surface')
-    ## drop the first n burn-in years
-    #years = as.POSIXlt(wtr$DateTime)$year + 1900
-    #to_keep = !(years <= min(years) + nburn - 1)
-    #wtr_all = wtr[to_keep, ]
-    
-    core_metrics = necsc_thermal_metrics_core(run_dir, site_id)
-    
-    hansen_habitat = hansen_habitat_calc(run_dir, site_id)
-    
-    notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc'))
-    
-    nml = read_nml(file.path(run_dir, "glm2.nml"))
-    
-    if(have_cal){
-      cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv'))
-      cal_data$site_id = site_id
-      cat('Calibration data calculated\n')
-    }else{
-      cal_data = data.frame() #just use empy data frame if no cal data
-      cat('No Cal, calibration skipped\n')
-    }
-    
-    unlink(run_dir, recursive=TRUE)
-    
-    notaro_metrics$site_id = site_id
-    
-    all_data = list(wtr=wtr_all, core_metrics=core_metrics, 
-                    hansen_habitat=hansen_habitat, 
-                    site_id=site_id, 
-                    notaro_metrics=notaro_metrics, 
-                    nml=nml, 
-                    cal_data=cal_data)
-    
-    cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
-    
-    return(all_data)
-    
-  }, error=function(e){
-    unlink(run_dir, recursive=TRUE)
-    cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n')
-    return(list(error=e, site_id))
-  })
-}
-
-
-
-getnext = function(fname){
-  i=0
-  barefname = fname
-  while(file.exists(fname)){
-    i=i+1
-    fname = paste0(barefname, '.', i)
-  }
-  return(fname)
-}
-
-wrapup_output = function(out, out_dir, years){
-  
-  run_exists = file.exists(out_dir)
-  
-  if(!run_exists) {dir.create(out_dir, recursive=TRUE)}
-  
-  good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
-  bad_data  = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
-  
-  sprintf('%i lakes ran\n', length(good_data))
-  dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
-  #drop the burn-in years
-  dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
-  
-  hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
-  hansen_habitat = subset(hansen_habitat, year %in% years)
-  
-  core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
-  core_metrics = subset(core_metrics, year %in% years)
-  
-  notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
-  
-  cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]}))
-  
-  model_config = lapply(good_data, function(x){x$nml})
-  
-  notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
-  write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file))
-  write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-  write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-  write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-  
-  
-  save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
-  save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
-  save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
-  
-  rm(out, good_data, dframes)
-  gc()
-}
-
 
 ################################################################################
-## Lets run Downscaled climate runs 1980-1999, 2020-2039, 2080:2099
-################################################################################
-gcm_driver_fun = function(site_id, dname){
-  drivers = read.csv(get_driver_path(paste0(site_id, ''), driver_name = dname, timestep = 'daily'), header=TRUE)
-  #nldas   = read.csv(get_driver_path(paste0(site_id, ''), driver_name = 'NLDAS'), header=TRUE)
-  #drivers = driver_nldas_debias_airt_sw(drivers, nldas)
-  drivers = driver_add_burnin_years(drivers, nyears=2)
-  drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
-  driver_save(drivers)
-}
-
-nldas_driver_fun = function(site_id, dname){
-  nldas = read.csv(get_driver_path(site_id, driver_name = dname), header=TRUE)
-  drivers = driver_nldas_wind_debias(nldas)
-  drivers = driver_add_burnin_years(drivers, nyears=2)
-  drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off
-  #fix the 2-day offset in NLDAS data
-  drivers$time = drivers$time + as.difftime(-2, units='days')
-  driver_save(drivers)
-}
-
+### read in env var config
 #This should be 1 to n, where n may be larger than total number of lakes to model
 task_id = as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID', 'NA'))
 
@@ -219,6 +20,8 @@ if(is.na(task_offset)){
 }
 task_id = task_id + task_offset
 
+################################################################################
+### read in config file config
 config = read.table('config', header=TRUE, as.is=TRUE)
 
 driver_name = config$drivername
@@ -226,14 +29,6 @@ driver_url = config$driverurl
 out_dir = file.path(config$outdir, driver_name)
 set_driver_url(driver_url)
 
-if(driver_name == 'NLDAS'){
-  driver_fun = nldas_driver_fun
-  yeargroups = list(1979:2015)
-}else{
-  driver_fun = gcm_driver_fun
-  yeargroups = list(1981:2000, 2040:2059, 2080:2099)
-}
-
 to_run = as.character(unique(zmax$site_id))
 if(task_id > length(to_run)){
   sprintf('Skipping task_id:%i because greater than number of lakes to run')
@@ -243,17 +38,5 @@ if(task_id > length(to_run)){
 site_id = to_run[task_id]
 
 
-for(ygroup in yeargroups){
-  start = Sys.time()
-  out = lapply(site_id, future_hab_wtr, 
-               modern_era=ygroup, 
-               secchi_function=secchi_standard,
-               driver_function=function(site_id){driver_fun(site_id, driver_name)})
-  
-  wrapup_output(out, file.path(out_dir, site_id), years=ygroup)
-  
-  print(difftime(Sys.time(), start, units='hours'))
-  cat('on to the next\n')
-}
-
+run_necsc_lake(site_id, driver_name, out_dir)
 

From 31566fa066e87b3c5d16c89d363263fb325ea8d6 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Wed, 7 Sep 2016 11:24:02 -0500
Subject: [PATCH 41/42] build for documentation and namespace and move file

---
 NAMESPACE                                       |  1 +
 R/run_necsc_lake.R                              |  1 -
 demo/mpi_output_data.R                          |  2 +-
 {R => demo}/slurm_array_cleanup_missing_lakes.R |  0
 man/run_necsc_lake.Rd                           | 13 +++++++++++++
 5 files changed, 15 insertions(+), 2 deletions(-)
 rename {R => demo}/slurm_array_cleanup_missing_lakes.R (100%)
 create mode 100644 man/run_necsc_lake.Rd

diff --git a/NAMESPACE b/NAMESPACE
index b7f3777..ab8fca8 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -42,6 +42,7 @@ export(necsc_thermal_metrics_core)
 export(opti_thermal_habitat)
 export(populate_base_lake_nml)
 export(prep_run_glm_kd)
+export(run_necsc_lake)
 export(sb_archive_model)
 export(sens_seasonal_site)
 export(set_driver_url)
diff --git a/R/run_necsc_lake.R b/R/run_necsc_lake.R
index 810cbc2..a0a719e 100644
--- a/R/run_necsc_lake.R
+++ b/R/run_necsc_lake.R
@@ -236,5 +236,4 @@ run_necsc_lake = function(site_id = NA, driver_name, out_dir){
     cat('on to the next\n')
   }
   
-  
 }
\ No newline at end of file
diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R
index 6dbb6b3..737e1a7 100644
--- a/demo/mpi_output_data.R
+++ b/demo/mpi_output_data.R
@@ -1,5 +1,5 @@
-library(mda.lakes)
 
+library(mda.lakes)
 
 config = read.table('config', header=TRUE, as.is=TRUE)
 
diff --git a/R/slurm_array_cleanup_missing_lakes.R b/demo/slurm_array_cleanup_missing_lakes.R
similarity index 100%
rename from R/slurm_array_cleanup_missing_lakes.R
rename to demo/slurm_array_cleanup_missing_lakes.R
diff --git a/man/run_necsc_lake.Rd b/man/run_necsc_lake.Rd
new file mode 100644
index 0000000..c86d954
--- /dev/null
+++ b/man/run_necsc_lake.Rd
@@ -0,0 +1,13 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/run_necsc_lake.R
+\name{run_necsc_lake}
+\alias{run_necsc_lake}
+\title{Large wrapper function NECSC mod run}
+\usage{
+run_necsc_lake(site_id = NA, driver_name, out_dir)
+}
+\description{
+Runs a single NECSC lake given the default configuration for 
+both NLDAS and Notaro drivers
+}
+

From e22a35dd13a5226f0bf3f9f40eca01bd533ad240 Mon Sep 17 00:00:00 2001
From: Luke Winslow <lwinslow@usgs.gov>
Date: Wed, 7 Sep 2016 15:11:00 -0500
Subject: [PATCH 42/42] better error handling on results output

---
 R/run_necsc_lake.R | 57 +++++++++++++++++++++++-----------------------
 1 file changed, 29 insertions(+), 28 deletions(-)

diff --git a/R/run_necsc_lake.R b/R/run_necsc_lake.R
index a0a719e..2e2c20d 100644
--- a/R/run_necsc_lake.R
+++ b/R/run_necsc_lake.R
@@ -156,37 +156,38 @@ run_necsc_lake = function(site_id = NA, driver_name, out_dir){
     
     good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
     bad_data  = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))]
-    
-    sprintf('%i lakes ran\n', length(good_data))
-    dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
-    #drop the burn-in years
-    dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
-    
-    hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
-    hansen_habitat = subset(hansen_habitat, year %in% years)
-    
-    core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
-    core_metrics = subset(core_metrics, year %in% years)
-    
-    notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
-    
-    cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]}))
-    
-    model_config = lapply(good_data, function(x){x$nml})
-    
-    notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
-    write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file))
-    write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-    write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
-    write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+    save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
     
     
-    save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
-    save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata')))
-    save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
+    sprintf('%i lakes ran\n', length(good_data))
+    if(length(good_data) > 0){
+      dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)})
+      #drop the burn-in years
+      dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))})
+      
+      hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]}))
+      hansen_habitat = subset(hansen_habitat, year %in% years)
+      
+      core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]}))
+      core_metrics = subset(core_metrics, year %in% years)
+      
+      notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]}))
+      
+      cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]}))
+      
+      model_config = lapply(good_data, function(x){x$nml})
+      
+      notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv'))
+      write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file))
+      write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+      write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+      write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists)
+      
+      
+      save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata')))
+      save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata')))
+    }
     
-    rm(out, good_data, dframes)
-    gc()
   }