From 62da805ecc88b85e6f10c12fbbb115f0a00497ef Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Fri, 22 Jul 2016 09:40:08 -0500 Subject: [PATCH 01/42] separate notaro sim output by year range --- demo/mpi_hab_out.R | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index bc5db51..41f6c57 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -110,19 +110,19 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func hansen_habitat = hansen_habitat_calc(run_dir, site_id) - #notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc')) + notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc')) nml = read_nml(file.path(run_dir, "glm2.nml")) unlink(run_dir, recursive=TRUE) - #notaro_metrics$site_id = site_id + notaro_metrics$site_id = site_id all_data = list(wtr=wtr_all, core_metrics=core_metrics, hansen_habitat=hansen_habitat, site_id=site_id, - #notaro_metrics=notaro_metrics, + notaro_metrics=notaro_metrics, nml=nml) return(all_data) @@ -152,8 +152,8 @@ getnext = function(fname){ return(fname) } -wrapup_output = function(out, run_name, years){ - out_dir = file.path('.', run_name) +wrapup_output = function(out, out_dir, years){ + #out_dir = file.path('.', run_name) run_exists = file.exists(out_dir) @@ -173,13 +173,15 @@ wrapup_output = function(out, run_name, years){ core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) core_metrics = subset(core_metrics, year %in% years) - #notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) + notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) model_config = lapply(good_data, function(x){x$nml}) + notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) + write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - #write.table(notaro_metrics, file.path(out_dir, 'notaro_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) @@ -205,8 +207,9 @@ driver_fun = function(site_id, gcm){ config = read.table('config', header=TRUE, as.is=TRUE) driver_name = config$drivername - driver_url = config$driverurl +out_dir = config$outdir + to_run = as.character(unique(zmax$site_id)) to_run = split(to_run, cut(seq_along(to_run), mpisize, labels = FALSE))[[mpirank+1]] @@ -232,7 +235,7 @@ for(ygroup in yeargroups){ secchi_function=secchi_standard, driver_function=function(site_id){driver_fun(site_id, driver_name)}) - wrapup_output(out, run_name, years=ygroup) + wrapup_output(out, file.path(out_dir, run_name), years=ygroup) print(difftime(Sys.time(), start, units='hours')) cat('on to the next\n') From 15390d54a03bcc6dd9fc42ada4600727f192a533 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Fri, 22 Jul 2016 09:41:01 -0500 Subject: [PATCH 02/42] version increment --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5f358ac..ef4368b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.1 +Version: 4.2.2 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow From 15f6c285182c51372ae0d8d7cdf8218deca974e0 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Fri, 22 Jul 2016 10:06:49 -0500 Subject: [PATCH 03/42] need driver name in there too --- demo/mpi_hab_out.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index 41f6c57..7be7850 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -208,7 +208,7 @@ config = read.table('config', header=TRUE, as.is=TRUE) driver_name = config$drivername driver_url = config$driverurl -out_dir = config$outdir +out_dir = file.path(config$outdir, driver_name) to_run = as.character(unique(zmax$site_id)) From c51b63ec287659a336b2a385ef6e2c780bdacd59 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 25 Jul 2016 12:37:47 -0500 Subject: [PATCH 04/42] debugging some YETI issues --- DESCRIPTION | 2 +- demo/mpi_hab_out.R | 9 +-------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index ef4368b..886453f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.2 +Version: 4.2.3 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index 7be7850..2f972ae 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -134,13 +134,6 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func } -driver_fun = function(site_id){ - nldas = read.csv(get_driver_path(site_id, driver_name = 'NLDAS'), header=TRUE) - drivers = driver_nldas_wind_debias(nldas) - drivers = driver_add_burnin_years(drivers, nyears=2) - drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off - driver_save(drivers) -} getnext = function(fname){ i=0 @@ -224,7 +217,7 @@ set_driver_url(driver_url) run_name = paste0(mpirank) ##1980-1999 -runsplits = split(1:length(to_run), floor(1:length(to_run)/1e3)) +runsplits = split(1:length(to_run), floor(1:length(to_run)/10)) yeargroups = list(1980:1999, 2020:2039, 2080:2099) for(ygroup in yeargroups){ From 0f1304c9f16884dbc8a8f97f61258e0fda7bab22 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 25 Jul 2016 12:42:44 -0500 Subject: [PATCH 05/42] more specific details back --- DESCRIPTION | 2 +- demo/mpi_hab_out.R | 69 ++++++++++++++++++++++++---------------------- 2 files changed, 37 insertions(+), 34 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 886453f..fe50c14 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.3 +Version: 4.2.4 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index 2f972ae..7885739 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -147,40 +147,43 @@ getnext = function(fname){ wrapup_output = function(out, out_dir, years){ #out_dir = file.path('.', run_name) + tryCatch({ + run_exists = file.exists(out_dir) + + if(!run_exists) {dir.create(out_dir, recursive=TRUE)} + + good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] + bad_data = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] + + sprintf('%i lakes ran\n', length(good_data)) + dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) + #drop the burn-in years + dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) + + hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) + hansen_habitat = subset(hansen_habitat, year %in% years) + + core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) + core_metrics = subset(core_metrics, year %in% years) + + notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) + + model_config = lapply(good_data, function(x){x$nml}) + + notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) + write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + + + save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) + save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) + save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) + + rm(out, good_data, dframes) + + }, error=function(d){traceback()}) - run_exists = file.exists(out_dir) - - if(!run_exists) {dir.create(out_dir, recursive=TRUE)} - - good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] - bad_data = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] - - sprintf('%i lakes ran\n', length(good_data)) - dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) - #drop the burn-in years - dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) - - hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) - hansen_habitat = subset(hansen_habitat, year %in% years) - - core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) - core_metrics = subset(core_metrics, year %in% years) - - notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) - - model_config = lapply(good_data, function(x){x$nml}) - - notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) - write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - - - save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) - save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) - save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) - - rm(out, good_data, dframes) gc() } From e2b25c1edc187af3bec0e6b9629e81c2fea36b16 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 25 Jul 2016 13:33:34 -0500 Subject: [PATCH 06/42] try without using projects output --- DESCRIPTION | 2 +- demo/mpi_hab_out.R | 75 ++++++++++++++++++++++------------------------ 2 files changed, 37 insertions(+), 40 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fe50c14..acb4049 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.4 +Version: 4.2.5 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index 7885739..e1386fb 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -145,45 +145,42 @@ getnext = function(fname){ return(fname) } -wrapup_output = function(out, out_dir, years){ - #out_dir = file.path('.', run_name) - tryCatch({ - run_exists = file.exists(out_dir) - - if(!run_exists) {dir.create(out_dir, recursive=TRUE)} - - good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] - bad_data = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] - - sprintf('%i lakes ran\n', length(good_data)) - dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) - #drop the burn-in years - dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) - - hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) - hansen_habitat = subset(hansen_habitat, year %in% years) - - core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) - core_metrics = subset(core_metrics, year %in% years) - - notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) - - model_config = lapply(good_data, function(x){x$nml}) - - notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) - write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - - - save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) - save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) - save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) - - rm(out, good_data, dframes) - - }, error=function(d){traceback()}) +wrapup_output = function(out, run_name, years){ + out_dir = file.path('.', run_name) + + run_exists = file.exists(out_dir) + + if(!run_exists) {dir.create(out_dir, recursive=TRUE)} + + good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] + bad_data = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] + + sprintf('%i lakes ran\n', length(good_data)) + dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) + #drop the burn-in years + dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) + + hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) + hansen_habitat = subset(hansen_habitat, year %in% years) + + core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) + core_metrics = subset(core_metrics, year %in% years) + + notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) + + model_config = lapply(good_data, function(x){x$nml}) + + notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) + write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + + + save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) + save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) + save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) + rm(out, good_data, dframes) gc() } @@ -231,7 +228,7 @@ for(ygroup in yeargroups){ secchi_function=secchi_standard, driver_function=function(site_id){driver_fun(site_id, driver_name)}) - wrapup_output(out, file.path(out_dir, run_name), years=ygroup) + wrapup_output(out, run_name, years=ygroup) print(difftime(Sys.time(), start, units='hours')) cat('on to the next\n') From ee2452ee56925a15201bd05ae8fc712d83f0f968 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 25 Jul 2016 15:31:04 -0500 Subject: [PATCH 07/42] some issue with gather when running on linux, trying to fix --- R/summarize_var_notaro.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summarize_var_notaro.R b/R/summarize_var_notaro.R index fff35e6..5a41135 100644 --- a/R/summarize_var_notaro.R +++ b/R/summarize_var_notaro.R @@ -47,7 +47,7 @@ summarize_var_notaro <- function(nc.file, var.name){ mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% select(-DateTime, -tz, -base.date) %>% select(doy, everything()) %>% group_by(doy) %>% summarize_each(c('mean','sd')) %>% - setNames(c('doy',rename_depths(names(.)[-1L]))) %>% gather(key = doy) %>% + setNames(c('doy',rename_depths(names(.)[-1L]))) %>% ##gather(key = doy) %>% setNames(c('doy','depth_stat','value')) %>% mutate(depth=get_depth(depth_stat), statistic=get_stat(depth_stat), variable=value.name) %>% select(doy, depth, statistic, value, variable) %>% @@ -56,7 +56,7 @@ summarize_var_notaro <- function(nc.file, var.name){ var <- get_var(nc.file, var.name)%>% mutate(base.date=as.POSIXct(paste0(lubridate::year(DateTime),'-01-01')), tz='UTC') %>% mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% select_('doy', var.name) %>% group_by(doy) %>% - summarize_each(c('mean','sd')) %>% gather(key = doy) %>% + summarize_each(c('mean','sd')) %>% ##gather(key = doy) %>% setNames(c('doy','statistic','value')) %>% mutate(depth=NA, variable=value.name) %>% select(doy, depth, statistic, value, variable) %>% From b020d26210b59c5f160b348ce0ab1cb77bcd4a12 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Tue, 26 Jul 2016 09:58:24 -0500 Subject: [PATCH 08/42] fixing freaking tibble caused issue on *nix --- DESCRIPTION | 2 +- R/summarize_var_notaro.R | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index acb4049..7829ce0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.5 +Version: 4.2.6 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/summarize_var_notaro.R b/R/summarize_var_notaro.R index 5a41135..9575a03 100644 --- a/R/summarize_var_notaro.R +++ b/R/summarize_var_notaro.R @@ -47,7 +47,8 @@ summarize_var_notaro <- function(nc.file, var.name){ mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% select(-DateTime, -tz, -base.date) %>% select(doy, everything()) %>% group_by(doy) %>% summarize_each(c('mean','sd')) %>% - setNames(c('doy',rename_depths(names(.)[-1L]))) %>% ##gather(key = doy) %>% + setNames(c('doy',rename_depths(names(.)[-1L]))) %>% + as.data.frame %>% gather(key = doy) %>% setNames(c('doy','depth_stat','value')) %>% mutate(depth=get_depth(depth_stat), statistic=get_stat(depth_stat), variable=value.name) %>% select(doy, depth, statistic, value, variable) %>% @@ -56,7 +57,7 @@ summarize_var_notaro <- function(nc.file, var.name){ var <- get_var(nc.file, var.name)%>% mutate(base.date=as.POSIXct(paste0(lubridate::year(DateTime),'-01-01')), tz='UTC') %>% mutate(doy=as.numeric(DateTime-base.date)/86400+1) %>% select_('doy', var.name) %>% group_by(doy) %>% - summarize_each(c('mean','sd')) %>% ##gather(key = doy) %>% + summarize_each(c('mean','sd')) %>% as.data.frame %>% gather(doy) %>% setNames(c('doy','statistic','value')) %>% mutate(depth=NA, variable=value.name) %>% select(doy, depth, statistic, value, variable) %>% From 9aa0a4e4017a8e280e41463f591b0efceb18fc28 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Tue, 26 Jul 2016 10:12:06 -0500 Subject: [PATCH 09/42] return to projects dir outputs --- DESCRIPTION | 2 +- demo/mpi_hab_out.R | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7829ce0..5aebf92 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.6 +Version: 4.2.7 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index e1386fb..da6c9a3 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -145,8 +145,8 @@ getnext = function(fname){ return(fname) } -wrapup_output = function(out, run_name, years){ - out_dir = file.path('.', run_name) +wrapup_output = function(out, out_dir, years){ + #out_dir = file.path('.', run_name) run_exists = file.exists(out_dir) @@ -217,7 +217,7 @@ set_driver_url(driver_url) run_name = paste0(mpirank) ##1980-1999 -runsplits = split(1:length(to_run), floor(1:length(to_run)/10)) +runsplits = split(1:length(to_run), floor(1:length(to_run)/1e3)) yeargroups = list(1980:1999, 2020:2039, 2080:2099) for(ygroup in yeargroups){ @@ -228,7 +228,7 @@ for(ygroup in yeargroups){ secchi_function=secchi_standard, driver_function=function(site_id){driver_fun(site_id, driver_name)}) - wrapup_output(out, run_name, years=ygroup) + wrapup_output(out, system.file(out_dir, run_name), years=ygroup) print(difftime(Sys.time(), start, units='hours')) cat('on to the next\n') From d9aafe3675dedba89bac35090c6411fcbeefffeb Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Tue, 26 Jul 2016 10:18:22 -0500 Subject: [PATCH 10/42] 100, not 1000 --- demo/mpi_hab_out.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index da6c9a3..b52aa9a 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -217,7 +217,7 @@ set_driver_url(driver_url) run_name = paste0(mpirank) ##1980-1999 -runsplits = split(1:length(to_run), floor(1:length(to_run)/1e3)) +runsplits = split(1:length(to_run), floor(1:length(to_run)/1e2)) yeargroups = list(1980:1999, 2020:2039, 2080:2099) for(ygroup in yeargroups){ From 5ae34f00a4825aaca53f96a05e8d12d988d581ae Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Tue, 26 Jul 2016 17:35:42 -0500 Subject: [PATCH 11/42] fix file path typo --- demo/mpi_hab_out.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index b52aa9a..3b47dc7 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -171,7 +171,7 @@ wrapup_output = function(out, out_dir, years){ model_config = lapply(good_data, function(x){x$nml}) notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) - write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file)) write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) @@ -228,7 +228,7 @@ for(ygroup in yeargroups){ secchi_function=secchi_standard, driver_function=function(site_id){driver_fun(site_id, driver_name)}) - wrapup_output(out, system.file(out_dir, run_name), years=ygroup) + wrapup_output(out, file.path(out_dir, run_name), years=ygroup) print(difftime(Sys.time(), start, units='hours')) cat('on to the next\n') From 3718d4440acddc660e888d4627d7d476db067c62 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Tue, 26 Jul 2016 17:36:13 -0500 Subject: [PATCH 12/42] version increment --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5aebf92..d30a7d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.7 +Version: 4.2.8 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow From 77a5891118cfa791e4f10b696639420d552c1a64 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Fri, 5 Aug 2016 15:52:07 -0500 Subject: [PATCH 13/42] add cal data output to model runs --- demo/mpi_hab_out.R | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index b52aa9a..c7b77f8 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -74,6 +74,17 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func #rename for dplyr nhd_id = site_id + #prep observations for calibration data + data(wtemp) + obs = filter(wtemp, site_id == nhd_id) %>% + transmute(DateTime=date, Depth=depth, temp=wtemp) + + #having a weird issue with resample_to_field, make unique + obs = obs[!duplicated(obs[,1:2]), ] + + write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE) + + #get driver data driver_path = driver_function(site_id) driver_path = gsub('\\\\', '/', driver_path) @@ -114,6 +125,8 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func nml = read_nml(file.path(run_dir, "glm2.nml")) + cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv')) + cal_data$site_id = site_id unlink(run_dir, recursive=TRUE) @@ -123,7 +136,8 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func hansen_habitat=hansen_habitat, site_id=site_id, notaro_metrics=notaro_metrics, - nml=nml) + nml=nml, + cal_data=cal_data) return(all_data) @@ -168,12 +182,15 @@ wrapup_output = function(out, out_dir, years){ notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) + cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]})) + model_config = lapply(good_data, function(x){x$nml}) notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) @@ -207,11 +224,6 @@ out_dir = file.path(config$outdir, driver_name) to_run = as.character(unique(zmax$site_id)) to_run = split(to_run, cut(seq_along(to_run), mpisize, labels = FALSE))[[mpirank+1]] -#clusterExport(c1, 'driver_fun') -#clusterExport(c1, 'secchi_standard') -#clusterExport(c1, 'driver_name') -#clusterExport(c1, 'driver_url') -#clusterCall(c1, function(){library(mda.lakes);set_driver_url(driver_url)}) set_driver_url(driver_url) run_name = paste0(mpirank) From e1f3e034ad81bc36d3c747d4ff6894ed71420a0b Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Fri, 5 Aug 2016 16:06:24 -0500 Subject: [PATCH 14/42] add ability to combine output data on cluster --- NAMESPACE | 1 + R/combine_output_data.R | 104 +++++++++++++++++++++++++++++++++++++ demo/combine_output_data.R | 77 --------------------------- demo/mpi_output_data.R | 12 +++++ man/combine_output_data.Rd | 13 +++++ 5 files changed, 130 insertions(+), 77 deletions(-) create mode 100644 R/combine_output_data.R delete mode 100644 demo/combine_output_data.R create mode 100644 demo/mpi_output_data.R create mode 100644 man/combine_output_data.Rd diff --git a/NAMESPACE b/NAMESPACE index edbbd3b..b279342 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -9,6 +9,7 @@ export(calc_stratified_periods) export(calc_toha) export(calc_toha_stats) export(comb_output_table) +export(combine_output_data) export(create_irr_day_cycle) export(driver_add_burnin_years) export(driver_add_rain) diff --git a/R/combine_output_data.R b/R/combine_output_data.R new file mode 100644 index 0000000..a112bc3 --- /dev/null +++ b/R/combine_output_data.R @@ -0,0 +1,104 @@ +#library(mda.lakes) +#library(sbtools) +#library(jsonlite) + +#' @title combine full sim run output data +#' +#' @description Combines all the individual compute node model files into +#' a few files for the whole simulation +#' +#' +#' +#' @export +combine_output_data = function(sim, path){ + #fast temp location for bundling wtr out data + #this needs about 60GB of scratch space available + fast_tmp = tempdir() + + core_path = paste0(path, sim, '/', sim, '_core_metrics.tsv') + cfg_path = paste0(path, sim, '/', sim, '_model_config.json') + hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv') + + core_metrics = comb_output_table(paste0(path, sim, '/*/best_core_metrics.tsv'), + sep='\t', header=TRUE, as.is=TRUE) + + write.table(core_metrics, core_path, + sep='\t', row.names=FALSE) + + hab_metrics = comb_output_table(paste0(path, sim, '/*/best_hansen_hab.tsv'), + sep='\t', header=TRUE, as.is=TRUE) + write.table(hab_metrics, hansen_path, + sep='\t', row.names=FALSE) + + nml_files = Sys.glob(paste0(path, sim, '/*/model_config.Rdata*')) + + ###read and handle NML files + all_nml = list() + for(i in 1:length(nml_files)){ + load(nml_files[i]) + all_nml = c(all_nml, model_config) + } + + all_nml = lapply(all_nml, function(x){class(x)='list'; x}) + #save('all_nml', file = paste0(path, sim, '/model_config.Rdata')) + writeLines(toJSON(all_nml), cfg_path) + + ###read and handle raw water temp data. + wtr_files = Sys.glob(paste0(path, sim, '/*/best_all_wtr.Rdata*')) + + all_wtr_files = c() + wtemp_dir = file.path(fast_tmp, sim) + dir.create(wtemp_dir) + + for(i in 1:length(wtr_files)){ + load(wtr_files[i]) + + newfiles = lapply(dframes, function(df){ + site_id = df$site_id[1] + df$site_id = NULL + wtemp_path = paste0(wtemp_dir, '/', sim, '_', site_id, '.tsv') + + #the future sim periods were done separately, so they need to be appended + if(wtemp_path %in% all_wtr_files){ + write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE, append=TRUE, col.names=FALSE) + }else{ + write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE) + } + + return(wtemp_path) + }) + + all_wtr_files = c(all_wtr_files, newfiles) + } + + #split up files into 1000 lake groups + all_wtr_files = sort(unique(unlist(all_wtr_files))) + splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/500)) + + wtemp_zips = file.path(path, sim, paste0(sim, '_wtemp_', seq_along(splits), '.zip')) + + #write an index file for later users + wtemp_zip_index = do.call(rbind, lapply(seq_along(splits), function(i){ + data.frame(file_index=rep(basename(wtemp_zips)[i], length(splits[[i]])), + file_name=basename(all_wtr_files)[splits[[i]]]) + })) + wtemp_indx = file.path(path, sim, paste0(sim, '_wtemp_index.tsv')) + write.table(wtemp_zip_index, wtemp_indx, sep='\t', row.names=FALSE) + + + for(i in 1:length(splits)){ + zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]]) + } + #delete raw text files to save space + unlink(all_wtr_files) + + # #upload files to SB when done + # authenticate_sb(user, pass) + # itm_title = paste0('Simulated lake temperatures for ', sim, ' future projections') + # sim_itm = item_create(parent_id=sb_itm_root, title=itm_title) + # + # item_append_files(sim_itm, files=c(core_path, cfg_path, hansen_path, wtemp_zips, wtemp_indx)) + print(c(core_path, cfg_path, hansen_path, wtemp_zips, wtemp_indx)) + return(c(core_path, cfg_path, hansen_path, wtemp_zips, wtemp_indx)) +} + diff --git a/demo/combine_output_data.R b/demo/combine_output_data.R deleted file mode 100644 index a3d6091..0000000 --- a/demo/combine_output_data.R +++ /dev/null @@ -1,77 +0,0 @@ -library(mda.lakes) -library(sbtools) -library(jsonlite) - -combine_output_data = function(sim, path, sb_itm_root, user, pass){ - - core_path = paste0(path, sim, '/best_core_metrics.tsv') - cfg_path = paste0(path, sim, "/model_config.json") - hansen_path = paste0(path, sim, '/best_hansen_hab.tsv') - - core_metrics = comb_output_table(paste0(path, sim, '/*/best_core_metrics.tsv'), - sep='\t', header=TRUE, as.is=TRUE) - - write.table(core_metrics, core_path, - sep='\t', row.names=FALSE) - - hab_metrics = comb_output_table(paste0(path, sim, '/*/best_hansen_hab.tsv'), - sep='\t', header=TRUE, as.is=TRUE) - write.table(hab_metrics, hansen_path, - sep='\t', row.names=FALSE) - - nml_files = Sys.glob(paste0(path, sim, '/*/model_config.Rdata*')) - - ###read and handle NML files - all_nml = list() - for(i in 1:length(nml_files)){ - load(nml_files[i]) - all_nml = c(all_nml, model_config) - } - - all_nml = lapply(all_nml, function(x){class(x)='list'; x}) - #save('all_nml', file = paste0(path, sim, '/model_config.Rdata')) - writeLines(toJSON(all_nml), cfg_path) - - ###read and handle raw water temp data. - # wtr_files = Sys.glob(paste0(path, sim, '/*/best_all_wtr.Rdata*')) - # - # all_wtr_files = c() - # wtemp_dir = file.path(path, sim, 'wtemp') - # dir.create(wtemp_dir) - # - # for(i in 1:length(wtr_files)){ - # load(wtr_files[i]) - # - # newfiles = lapply(dframes, function(df){ - # site_id = df$site_id[1] - # df$site_id = NULL - # wtemp_path = paste0(wtemp_dir, '/', site_id, '.tsv') - # - # #the future sim periods were done separately, so they need to be appended - # if(wtemp_path %in% all_wtr_files){ - # write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE, append=TRUE, col.names=FALSE) - # }else{ - # write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE) - # } - # - # return(wtemp_path) - # }) - # - # all_wtr_files = c(all_wtr_files, newfiles) - # invisible(sbtools::query_sb_text('necsc', limit=1)) - # } - # - # all_wtr_files = unique(unlist(all_wtr_files)) - # wtemp_zip = file.path(path, sim, 'wtemp.zip') - # splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/100)) - # for(i in 1:length(splits)){ - # zip(zipfile=wtemp_zip, files=all_wtr_files[splits[[i]]], flags='-j -r9X') - # } - - authenticate_sb(user, pass) - itm_title = paste0('Simulated lake temperatures for ', sim, ' future projections') - sim_itm = item_create(parent_id=sb_itm_root, title=itm_title) - - item_append_files(sim_itm, files=c(core_path, cfg_path, hansen_path))#, wtemp_zip)) -} - diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R new file mode 100644 index 0000000..a435800 --- /dev/null +++ b/demo/mpi_output_data.R @@ -0,0 +1,12 @@ +library(mda.lakes) + + +config = read.table('config', header=TRUE, as.is=TRUE) + +driver_name = config$drivername +driver_url = config$driverurl +out_dir = file.path(config$outdir, driver_name) + + +combine_output_data(driver_name, out_dir) + diff --git a/man/combine_output_data.Rd b/man/combine_output_data.Rd new file mode 100644 index 0000000..d0eb538 --- /dev/null +++ b/man/combine_output_data.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/combine_output_data.R +\name{combine_output_data} +\alias{combine_output_data} +\title{combine full sim run output data} +\usage{ +combine_output_data(sim, path) +} +\description{ +Combines all the individual compute node model files into +a few files for the whole simulation +} + From ecfa7946bf4048060075887c30ec9415febe6a66 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Fri, 5 Aug 2016 16:09:08 -0500 Subject: [PATCH 15/42] version increment --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5aebf92..d30a7d9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.7 +Version: 4.2.8 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow From fc12cc86192e889451ee95c64a0f70e9854ca106 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Wed, 10 Aug 2016 11:26:29 -0500 Subject: [PATCH 16/42] better handle calibration when no cal data available --- DESCRIPTION | 2 +- demo/mpi_hab_out.R | 26 +++++++++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d30a7d9..fd00be3 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.8 +Version: 4.2.9 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index 12a25c4..3578442 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -46,12 +46,13 @@ Sys.setenv(TZ='GMT') # clusterEvalQ(c1, Sys.setenv(TZ='GMT')) -future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){ +future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){ library(lakeattributes) library(mda.lakes) library(dplyr) library(glmtools) + library(lubridate) fastdir = tempdir() #for use on WiWSC Condor pool @@ -77,12 +78,17 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func #prep observations for calibration data data(wtemp) obs = filter(wtemp, site_id == nhd_id) %>% - transmute(DateTime=date, Depth=depth, temp=wtemp) + transmute(DateTime=date, Depth=depth, temp=wtemp) %>% + filter(year(DateTime) %in% modern_era) - #having a weird issue with resample_to_field, make unique - obs = obs[!duplicated(obs[,1:2]), ] + have_cal = nrow(obs) > 0 - write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE) + if(have_cal){ + #having a weird issue with resample_to_field, make unique + obs = obs[!duplicated(obs[,1:2]), ] + + write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE) + } #get driver data @@ -125,8 +131,14 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func nml = read_nml(file.path(run_dir, "glm2.nml")) - cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv')) - cal_data$site_id = site_id + if(have_cal){ + cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv')) + cal_data$site_id = site_id + cat('Calibration data calculated\n') + }else{ + cal_data = data.frame() #just use empy data frame if no cal data + cat('No Cal, calibration skipped\n') + } unlink(run_dir, recursive=TRUE) From 8910189f84184bb7558c2740158dfca16c80e309 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Thu, 11 Aug 2016 10:30:12 -0500 Subject: [PATCH 17/42] fixup final processing code --- DESCRIPTION | 2 +- R/combine_output_data.R | 5 +++++ demo/mpi_output_data.R | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index fd00be3..8026921 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.9 +Version: 4.2.10 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/combine_output_data.R b/R/combine_output_data.R index a112bc3..3a54aa6 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -15,6 +15,11 @@ combine_output_data = function(sim, path){ #this needs about 60GB of scratch space available fast_tmp = tempdir() + #ensure we have a trailing / on path + if(!substr(path, nchar(path), nchar(path)) == '/'){ + path = paste0(path, '/') + } + core_path = paste0(path, sim, '/', sim, '_core_metrics.tsv') cfg_path = paste0(path, sim, '/', sim, '_model_config.json') hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv') diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R index a435800..c82c325 100644 --- a/demo/mpi_output_data.R +++ b/demo/mpi_output_data.R @@ -5,7 +5,7 @@ config = read.table('config', header=TRUE, as.is=TRUE) driver_name = config$drivername driver_url = config$driverurl -out_dir = file.path(config$outdir, driver_name) +out_dir = config$outdir combine_output_data(driver_name, out_dir) From c14ed27820b4d0da3541043f12f92b28c974ae59 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Thu, 11 Aug 2016 11:02:53 -0500 Subject: [PATCH 18/42] fix jsonlite import --- DESCRIPTION | 2 +- NAMESPACE | 1 + R/combine_output_data.R | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8026921..f10a61a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.10 +Version: 4.2.11 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/NAMESPACE b/NAMESPACE index b279342..b465fe4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -47,6 +47,7 @@ export(set_driver_url) export(summarize_notaro) import(GLMr) import(glmtools) +import(jsonlite) import(lakeattributes) import(lubridate) import(rLakeAnalyzer) diff --git a/R/combine_output_data.R b/R/combine_output_data.R index 3a54aa6..9bd1fb4 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -7,6 +7,7 @@ #' @description Combines all the individual compute node model files into #' a few files for the whole simulation #' +#' @import jsonlite #' #' #' @export From 47bde1f99c61c178b255592ccbc9b283a4a129a6 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Thu, 11 Aug 2016 11:32:16 -0500 Subject: [PATCH 19/42] fix tempfile location --- DESCRIPTION | 2 +- R/combine_output_data.R | 11 ++++++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index f10a61a..4f9d173 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.11 +Version: 4.2.12 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/combine_output_data.R b/R/combine_output_data.R index 9bd1fb4..5456fc2 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -14,7 +14,16 @@ combine_output_data = function(sim, path){ #fast temp location for bundling wtr out data #this needs about 60GB of scratch space available - fast_tmp = tempdir() + #if ram_scratch is there, use it + fast_tmp = Sys.getenv('RAM_SCRATCH', unset = '') + + if(fast_tmp == ''){ + if(file.exists('/cxfs/scratch')){ + fast_tmp = '/cxfs/scratch' + }else{ + fast_tmp = tempdir() + } + } #ensure we have a trailing / on path if(!substr(path, nchar(path), nchar(path)) == '/'){ From 03bf4cd2683818e6efb14a8659a5a4d70c475e78 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Thu, 11 Aug 2016 11:59:30 -0500 Subject: [PATCH 20/42] try for NLDAS with cal out --- DESCRIPTION | 2 +- R/combine_output_data.R | 115 ++++++++++++++++++++++----------------- demo/mpi_hab_out_nldas.R | 31 ++++++++++- 3 files changed, 95 insertions(+), 53 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4f9d173..04b5913 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.12 +Version: 4.2.13 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/combine_output_data.R b/R/combine_output_data.R index 5456fc2..5567bd7 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -34,78 +34,95 @@ combine_output_data = function(sim, path){ cfg_path = paste0(path, sim, '/', sim, '_model_config.json') hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv') + ################################################################################ + ## read and handle core metrics core_metrics = comb_output_table(paste0(path, sim, '/*/best_core_metrics.tsv'), sep='\t', header=TRUE, as.is=TRUE) write.table(core_metrics, core_path, sep='\t', row.names=FALSE) + ################################################################################ + ## read and handle habitat metrics hab_metrics = comb_output_table(paste0(path, sim, '/*/best_hansen_hab.tsv'), sep='\t', header=TRUE, as.is=TRUE) write.table(hab_metrics, hansen_path, sep='\t', row.names=FALSE) + ################################################################################ + ###read and handle NML files nml_files = Sys.glob(paste0(path, sim, '/*/model_config.Rdata*')) - ###read and handle NML files - all_nml = list() - for(i in 1:length(nml_files)){ - load(nml_files[i]) - all_nml = c(all_nml, model_config) + if(length(nml_files) > 0){ + cat('Wrapping up all nml config.\n') + + all_nml = list() + for(i in 1:length(nml_files)){ + load(nml_files[i]) + all_nml = c(all_nml, model_config) + } + + all_nml = lapply(all_nml, function(x){class(x)='list'; x}) + writeLines(toJSON(all_nml), cfg_path) + }else{ + cat('Skipping nml config.\n') } - all_nml = lapply(all_nml, function(x){class(x)='list'; x}) - #save('all_nml', file = paste0(path, sim, '/model_config.Rdata')) - writeLines(toJSON(all_nml), cfg_path) - + ################################################################################ ###read and handle raw water temp data. wtr_files = Sys.glob(paste0(path, sim, '/*/best_all_wtr.Rdata*')) - all_wtr_files = c() - wtemp_dir = file.path(fast_tmp, sim) - dir.create(wtemp_dir) - - for(i in 1:length(wtr_files)){ - load(wtr_files[i]) + if(length(wtr_files) > 0){ + cat('Wrapping up all raw wtr data.\n') + all_wtr_files = c() + wtemp_dir = file.path(fast_tmp, sim) + dir.create(wtemp_dir) - newfiles = lapply(dframes, function(df){ - site_id = df$site_id[1] - df$site_id = NULL - wtemp_path = paste0(wtemp_dir, '/', sim, '_', site_id, '.tsv') + for(i in 1:length(wtr_files)){ + load(wtr_files[i]) - #the future sim periods were done separately, so they need to be appended - if(wtemp_path %in% all_wtr_files){ - write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE, append=TRUE, col.names=FALSE) - }else{ - write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE) - } + newfiles = lapply(dframes, function(df){ + site_id = df$site_id[1] + df$site_id = NULL + wtemp_path = paste0(wtemp_dir, '/', sim, '_', site_id, '.tsv') + + #the future sim periods were done separately, so they need to be appended + if(wtemp_path %in% all_wtr_files){ + write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE, append=TRUE, col.names=FALSE) + }else{ + write.table(df, wtemp_path, sep='\t', row.names=FALSE, quote=FALSE) + } + + return(wtemp_path) + }) - return(wtemp_path) - }) + all_wtr_files = c(all_wtr_files, newfiles) + } - all_wtr_files = c(all_wtr_files, newfiles) - } - - #split up files into 1000 lake groups - all_wtr_files = sort(unique(unlist(all_wtr_files))) - splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/500)) - - wtemp_zips = file.path(path, sim, paste0(sim, '_wtemp_', seq_along(splits), '.zip')) - - #write an index file for later users - wtemp_zip_index = do.call(rbind, lapply(seq_along(splits), function(i){ - data.frame(file_index=rep(basename(wtemp_zips)[i], length(splits[[i]])), - file_name=basename(all_wtr_files)[splits[[i]]]) - })) - wtemp_indx = file.path(path, sim, paste0(sim, '_wtemp_index.tsv')) - write.table(wtemp_zip_index, wtemp_indx, sep='\t', row.names=FALSE) - - - for(i in 1:length(splits)){ - zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]]) + #split up files into 1000 lake groups + all_wtr_files = sort(unique(unlist(all_wtr_files))) + splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/500)) + + wtemp_zips = file.path(path, sim, paste0(sim, '_wtemp_', seq_along(splits), '.zip')) + + #write an index file for later users + wtemp_zip_index = do.call(rbind, lapply(seq_along(splits), function(i){ + data.frame(file_index=rep(basename(wtemp_zips)[i], length(splits[[i]])), + file_name=basename(all_wtr_files)[splits[[i]]]) + })) + wtemp_indx = file.path(path, sim, paste0(sim, '_wtemp_index.tsv')) + write.table(wtemp_zip_index, wtemp_indx, sep='\t', row.names=FALSE) + + + for(i in 1:length(splits)){ + zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]]) + } + #delete raw text files to save space + unlink(all_wtr_files) + + }else{ + cat('Skipping raw wtr data.\n') } - #delete raw text files to save space - unlink(all_wtr_files) # #upload files to SB when done # authenticate_sb(user, pass) diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R index be32522..72d040c 100644 --- a/demo/mpi_hab_out_nldas.R +++ b/demo/mpi_hab_out_nldas.R @@ -74,6 +74,21 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func #rename for dplyr nhd_id = site_id + #prep observations for calibration data + data(wtemp) + obs = filter(wtemp, site_id == nhd_id) %>% + transmute(DateTime=date, Depth=depth, temp=wtemp) %>% + filter(year(DateTime) %in% modern_era) + + have_cal = nrow(obs) > 0 + + if(have_cal){ + #having a weird issue with resample_to_field, make unique + obs = obs[!duplicated(obs[,1:2]), ] + + write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE) + } + #get driver data driver_path = driver_function(site_id) driver_path = gsub('\\\\', '/', driver_path) @@ -114,6 +129,15 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func nml = read_nml(file.path(run_dir, "glm2.nml")) + ## if we have cal, use it + if(have_cal){ + cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv')) + cal_data$site_id = site_id + cat('Calibration data calculated\n') + }else{ + cal_data = data.frame() #just use empy data frame if no cal data + cat('No Cal, calibration skipped\n') + } unlink(run_dir, recursive=TRUE) @@ -123,7 +147,8 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func hansen_habitat=hansen_habitat, site_id=site_id, #notaro_metrics=notaro_metrics, - nml=nml) + nml=nml, + cal_data=cal_data) return(all_data) @@ -173,13 +198,13 @@ wrapup_output = function(out, run_name, years){ core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) core_metrics = subset(core_metrics, year %in% years) - #notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) + cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]})) model_config = lapply(good_data, function(x){x$nml}) write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - #write.table(notaro_metrics, file.path(out_dir, 'notaro_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) From 759230f8d89a682b49317279f886c5bd23003c6d Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Fri, 12 Aug 2016 09:34:30 -0500 Subject: [PATCH 21/42] add cal wrapup functionality --- DESCRIPTION | 2 +- R/combine_output_data.R | 18 ++++++++++++++++-- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 04b5913..3d0a7ab 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.13 +Version: 4.2.14 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/combine_output_data.R b/R/combine_output_data.R index 5567bd7..8d92855 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -30,9 +30,10 @@ combine_output_data = function(sim, path){ path = paste0(path, '/') } - core_path = paste0(path, sim, '/', sim, '_core_metrics.tsv') - cfg_path = paste0(path, sim, '/', sim, '_model_config.json') + core_path = paste0(path, sim, '/', sim, '_core_metrics.tsv') + cfg_path = paste0(path, sim, '/', sim, '_model_config.json') hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv') + cal_path = paste0(path, sim, '/', sim, '_calibration_data.tsv') ################################################################################ ## read and handle core metrics @@ -49,6 +50,19 @@ combine_output_data = function(sim, path){ write.table(hab_metrics, hansen_path, sep='\t', row.names=FALSE) + + ################################################################################ + ## read and handle habitat metrics + if(length(Sys.glob(paste0(path, sim, '/*/best_cal_data.tsv')))){ + cat('Cal wrapup running.\n') + cal_data = comb_output_table(paste0(path, sim, '/*/best_cal_data.tsv'), + sep='\t', header=TRUE, as.is=TRUE) + write.table(cal_data, cal_path, + sep='\t', row.names=FALSE) + }else{ + cat('Skipping cal wrapup because no cal data.\n') + } + ################################################################################ ###read and handle NML files nml_files = Sys.glob(paste0(path, sim, '/*/model_config.Rdata*')) From 6c7072d0c770f73062ae8d84fea28aa2da85d772 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 15 Aug 2016 12:07:50 -0500 Subject: [PATCH 22/42] change scratch dir --- DESCRIPTION | 2 +- R/combine_output_data.R | 13 +------------ demo/mpi_output_data.R | 20 +++++++++++++++++++- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3d0a7ab..b981048 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.14 +Version: 4.2.15 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/combine_output_data.R b/R/combine_output_data.R index 8d92855..902cb0e 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -11,19 +11,8 @@ #' #' #' @export -combine_output_data = function(sim, path){ - #fast temp location for bundling wtr out data - #this needs about 60GB of scratch space available - #if ram_scratch is there, use it - fast_tmp = Sys.getenv('RAM_SCRATCH', unset = '') +combine_output_data = function(sim, path, fast_tmp=tempdir()){ - if(fast_tmp == ''){ - if(file.exists('/cxfs/scratch')){ - fast_tmp = '/cxfs/scratch' - }else{ - fast_tmp = tempdir() - } - } #ensure we have a trailing / on path if(!substr(path, nchar(path), nchar(path)) == '/'){ diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R index c82c325..4ffd72d 100644 --- a/demo/mpi_output_data.R +++ b/demo/mpi_output_data.R @@ -6,7 +6,25 @@ config = read.table('config', header=TRUE, as.is=TRUE) driver_name = config$drivername driver_url = config$driverurl out_dir = config$outdir +scratch_dir = config$scratch + +#scratch_dir = '/cxfs/projects/usgs/water/owi/lwinslow/scratch/' +dir.create(scratch_dir) + +combine_output_data(driver_name, out_dir, fast_tmp=scratch_dir) + +# #fast temp location for bundling wtr out data +# #this needs about 60GB of scratch space available +# #if ram_scratch is there, use it +# fast_tmp = Sys.getenv('RAM_SCRATCH', unset = '') +# +# if(fast_tmp == ''){ +# if(file.exists('/cxfs/scratch')){ +# fast_tmp = '/cxfs/scratch' +# }else{ +# fast_tmp = tempdir() +# } +# } -combine_output_data(driver_name, out_dir) From d1fa53f161628c124842636d9e6a1c44aa824ec6 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 15 Aug 2016 12:21:43 -0500 Subject: [PATCH 23/42] fix issue with zero lenght files --- DESCRIPTION | 2 +- R/comb_output_table.R | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b981048..b05880a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.15 +Version: 4.2.16 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/comb_output_table.R b/R/comb_output_table.R index 70888b8..25998ca 100644 --- a/R/comb_output_table.R +++ b/R/comb_output_table.R @@ -20,8 +20,10 @@ comb_output_table = function(pattern, ...){ out = data.frame() for(i in 1:length(files)){ - tmp = read.table(files[i], ...) - out = rbind(out, tmp) + if(file.info(files[i])$size > 0){ + tmp = read.table(files[i], ...) + out = rbind(out, tmp) + } } return(out) From 5933e558a1b8fdd4b38cbdc73f62a80b039ff3ce Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 15 Aug 2016 12:31:11 -0500 Subject: [PATCH 24/42] scratch scratch scratch --- DESCRIPTION | 2 +- demo/mpi_output_data.R | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index b05880a..206e1b7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.16 +Version: 4.2.17 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R index 4ffd72d..d1587a5 100644 --- a/demo/mpi_output_data.R +++ b/demo/mpi_output_data.R @@ -6,9 +6,12 @@ config = read.table('config', header=TRUE, as.is=TRUE) driver_name = config$drivername driver_url = config$driverurl out_dir = config$outdir -scratch_dir = config$scratch -#scratch_dir = '/cxfs/projects/usgs/water/owi/lwinslow/scratch/' +scratch_dir = Sys.getenv('GLOBAL_SCRATCH', unset=config$scratch) +if(is.null(scratch_dir) || scratch_dir==''){ + scratch_dir = '/cxfs/projects/usgs/water/owi/lwinslow/scratch/' +} + dir.create(scratch_dir) combine_output_data(driver_name, out_dir, fast_tmp=scratch_dir) From 8d993cfa0301ca4a282d551a2a843721a86e0494 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 15 Aug 2016 14:35:32 -0500 Subject: [PATCH 25/42] fix NLDAS data handling --- R/combine_output_data.R | 2 +- demo/mpi_hab_out_nldas.R | 1 + demo/mpi_output_data.R | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/combine_output_data.R b/R/combine_output_data.R index 902cb0e..311f113 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -118,7 +118,7 @@ combine_output_data = function(sim, path, fast_tmp=tempdir()){ for(i in 1:length(splits)){ - zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]]) + zip(zipfile=wtemp_zips[i], files=all_wtr_files[splits[[i]]], zip='zip') } #delete raw text files to save space unlink(all_wtr_files) diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R index 72d040c..f4f2e06 100644 --- a/demo/mpi_hab_out_nldas.R +++ b/demo/mpi_hab_out_nldas.R @@ -52,6 +52,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func library(mda.lakes) library(dplyr) library(glmtools) + library(lubridate) fastdir = tempdir() #for use on WiWSC Condor pool diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R index d1587a5..6dbb6b3 100644 --- a/demo/mpi_output_data.R +++ b/demo/mpi_output_data.R @@ -8,6 +8,7 @@ driver_url = config$driverurl out_dir = config$outdir scratch_dir = Sys.getenv('GLOBAL_SCRATCH', unset=config$scratch) +#base case just in case if(is.null(scratch_dir) || scratch_dir==''){ scratch_dir = '/cxfs/projects/usgs/water/owi/lwinslow/scratch/' } From 39ac247bd679f77e6094b4d6267f485f4ff9975c Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 15 Aug 2016 16:32:22 -0500 Subject: [PATCH 26/42] add more logging to sim runs --- DESCRIPTION | 2 +- demo/mpi_hab_out.R | 7 +++++-- demo/mpi_hab_out_nldas.R | 5 ++++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 206e1b7..567f159 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.17 +Version: 4.2.18 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index 3578442..221e9c0 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -69,7 +69,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_dri run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1))) - cat(run_dir, '\n') + cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') dir.create(run_dir) #rename for dplyr @@ -151,10 +151,13 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_dri nml=nml, cal_data=cal_data) + cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') + return(all_data) }, error=function(e){ - unlink(run_dir, recursive=TRUE); + unlink(run_dir, recursive=TRUE) + cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') return(list(error=e, site_id)) }) } diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R index f4f2e06..f734ce5 100644 --- a/demo/mpi_hab_out_nldas.R +++ b/demo/mpi_hab_out_nldas.R @@ -69,7 +69,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1))) - cat(run_dir, '\n') + cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') dir.create(run_dir) #rename for dplyr @@ -151,10 +151,13 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, future_era, driver_func nml=nml, cal_data=cal_data) + cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') + return(all_data) }, error=function(e){ unlink(run_dir, recursive=TRUE); + cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') return(list(error=e, site_id)) }) } From 963e96afcaeca858a7a81665edbb6b48cb4dac61 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 22 Aug 2016 12:35:09 -0500 Subject: [PATCH 27/42] try different MPI style --- demo/mpi_hab_out.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index 221e9c0..2eefc77 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -17,11 +17,15 @@ # mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_4.1.0.tar.gz'))}) -library(Rmpi) +#library(Rmpi) -args = commandArgs(trailingOnly=TRUE) -mpirank = mpi.comm.rank(0) -mpisize = mpi.comm.size(0) +#args = commandArgs(trailingOnly=TRUE) +mpirank = Sys.getenv('SLURM_PROCID', 'NA')#mpi.comm.rank(0) +mpisize = Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA') #mpi.comm.size(0) + +if(mpirank == 'NA' || mpisize == 'NA'){ + stop('trouble finding MPIRANK or MPISIZE') +} # if(mpi.comm.rank(0) != 0){ From 774752a2c822cf40e9568a6aa0871a7b3ef91f22 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 22 Aug 2016 12:35:46 -0500 Subject: [PATCH 28/42] for NLDAS --- demo/mpi_hab_out_nldas.R | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R index f734ce5..be272b6 100644 --- a/demo/mpi_hab_out_nldas.R +++ b/demo/mpi_hab_out_nldas.R @@ -17,11 +17,15 @@ # mdalakes_install = clusterCall(c1, function(){install_url(paste0('http://', local_url,'/mda.lakes_4.1.0.tar.gz'))}) -library(Rmpi) +#library(Rmpi) -args = commandArgs(trailingOnly=TRUE) -mpirank = mpi.comm.rank(0) -mpisize = mpi.comm.size(0) +#args = commandArgs(trailingOnly=TRUE) +mpirank = Sys.getenv('SLURM_PROCID', 'NA')#mpi.comm.rank(0) +mpisize = Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA') #mpi.comm.size(0) + +if(mpirank == 'NA' || mpisize == 'NA'){ + stop('trouble finding MPIRANK or MPISIZE') +} # if(mpi.comm.rank(0) != 0){ From 37a8c1d360d84aeec824494c765fb45270964d22 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 22 Aug 2016 12:42:14 -0500 Subject: [PATCH 29/42] fix numeric type --- demo/mpi_hab_out.R | 6 +++--- demo/mpi_hab_out_nldas.R | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index 2eefc77..b7face6 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -20,10 +20,10 @@ #library(Rmpi) #args = commandArgs(trailingOnly=TRUE) -mpirank = Sys.getenv('SLURM_PROCID', 'NA')#mpi.comm.rank(0) -mpisize = Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA') #mpi.comm.size(0) +mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0) +mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0) -if(mpirank == 'NA' || mpisize == 'NA'){ +if(is.na(mpirank)|| is.na(mpisize)){ stop('trouble finding MPIRANK or MPISIZE') } diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R index be272b6..ed20a21 100644 --- a/demo/mpi_hab_out_nldas.R +++ b/demo/mpi_hab_out_nldas.R @@ -20,10 +20,10 @@ #library(Rmpi) #args = commandArgs(trailingOnly=TRUE) -mpirank = Sys.getenv('SLURM_PROCID', 'NA')#mpi.comm.rank(0) -mpisize = Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA') #mpi.comm.size(0) +mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0) +mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0) -if(mpirank == 'NA' || mpisize == 'NA'){ +if(is.na(mpirank)|| is.na(mpisize)){ stop('trouble finding MPIRANK or MPISIZE') } From 7f7ab383dde7bfb7a644f7b74717664ff0b040e0 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 22 Aug 2016 12:45:58 -0500 Subject: [PATCH 30/42] for NLDAS --- demo/mpi_hab_out.R | 2 ++ demo/mpi_hab_out_nldas.R | 2 ++ 2 files changed, 4 insertions(+) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index b7face6..ba995bf 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -22,6 +22,8 @@ #args = commandArgs(trailingOnly=TRUE) mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0) mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0) +cat('MPIRANK:', mpirank, '\n') +cat('MPISIZE:', mpisize, '\n') if(is.na(mpirank)|| is.na(mpisize)){ stop('trouble finding MPIRANK or MPISIZE') diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R index ed20a21..7fb0ebe 100644 --- a/demo/mpi_hab_out_nldas.R +++ b/demo/mpi_hab_out_nldas.R @@ -22,6 +22,8 @@ #args = commandArgs(trailingOnly=TRUE) mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0) mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0) +cat('MPIRANK:', mpirank, '\n') +cat('MPISIZE:', mpisize, '\n') if(is.na(mpirank)|| is.na(mpisize)){ stop('trouble finding MPIRANK or MPISIZE') From cc389bb48112b87cdd3462ea7dc94156f5c80317 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Mon, 22 Aug 2016 12:48:59 -0500 Subject: [PATCH 31/42] fix usage of is.numeric --- demo/mpi_hab_out.R | 4 ++-- demo/mpi_hab_out_nldas.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/demo/mpi_hab_out.R b/demo/mpi_hab_out.R index ba995bf..b8d19e5 100644 --- a/demo/mpi_hab_out.R +++ b/demo/mpi_hab_out.R @@ -20,8 +20,8 @@ #library(Rmpi) #args = commandArgs(trailingOnly=TRUE) -mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0) -mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0) +mpirank = as.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0) +mpisize = as.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0) cat('MPIRANK:', mpirank, '\n') cat('MPISIZE:', mpisize, '\n') diff --git a/demo/mpi_hab_out_nldas.R b/demo/mpi_hab_out_nldas.R index 7fb0ebe..96994c8 100644 --- a/demo/mpi_hab_out_nldas.R +++ b/demo/mpi_hab_out_nldas.R @@ -20,8 +20,8 @@ #library(Rmpi) #args = commandArgs(trailingOnly=TRUE) -mpirank = is.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0) -mpisize = is.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0) +mpirank = as.numeric(Sys.getenv('SLURM_PROCID', 'NA')) #mpi.comm.rank(0) +mpisize = as.numeric(Sys.getenv('SLURM_STEP_NUM_TASKS', 'NA')) #mpi.comm.size(0) cat('MPIRANK:', mpirank, '\n') cat('MPISIZE:', mpisize, '\n') From 061c45384994055b4923b8e079f58edaeaea3f5d Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Thu, 25 Aug 2016 14:30:07 -0500 Subject: [PATCH 32/42] new job running script --- demo/slurm_array_hab_out_god_mode.R | 259 ++++++++++++++++++++++++++++ 1 file changed, 259 insertions(+) create mode 100644 demo/slurm_array_hab_out_god_mode.R diff --git a/demo/slurm_array_hab_out_god_mode.R b/demo/slurm_array_hab_out_god_mode.R new file mode 100644 index 0000000..0f99553 --- /dev/null +++ b/demo/slurm_array_hab_out_god_mode.R @@ -0,0 +1,259 @@ +#slurm_array_hab_out_god_mode +## This uses SLURM array running to run individual lakes +## MPI has been too troublesome + +#first load some modules: +# module load tools/netcdf-4.3.2-gnu + + +library(lakeattributes) +library(mda.lakes) +library(dplyr) +library(glmtools) +source(system.file('demo/common_running_functions.R', package='mda.lakes')) + +Sys.setenv(TZ='GMT') + +# clusterEvalQ(c1, Sys.setenv(TZ='GMT')) + + +future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){ + + library(lakeattributes) + library(mda.lakes) + library(dplyr) + library(glmtools) + library(lubridate) + + fastdir = tempdir() + #for use on WiWSC Condor pool + if(file.exists('/mnt/ramdisk')){ + fastdir = '/mnt/ramdisk' + } + #for use on YETI + if(Sys.getenv('RAM_SCRATCH', unset = '') != ''){ + fastdir = Sys.getenv('RAM_SCRATCH', unset = '') + } + + + tryCatch({ + + run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1))) + cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') + dir.create(run_dir) + + #rename for dplyr + nhd_id = site_id + + #prep observations for calibration data + data(wtemp) + obs = filter(wtemp, site_id == nhd_id) %>% + transmute(DateTime=date, Depth=depth, temp=wtemp) %>% + filter(year(DateTime) %in% modern_era) + + have_cal = nrow(obs) > 0 + + if(have_cal){ + #having a weird issue with resample_to_field, make unique + obs = obs[!duplicated(obs[,1:2]), ] + + write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE) + } + + + #get driver data + driver_path = driver_function(site_id) + driver_path = gsub('\\\\', '/', driver_path) + + + kd_avg = secchi_function(site_id) #secchi_conv/mean(kds$secchi_avg, na.rm=TRUE) + + #run with different driver and ice sources + + prep_run_glm_kd(site_id=site_id, + path=run_dir, + years=modern_era, + kd=kd_avg, + nml_args=c(list( + dt=3600, subdaily=FALSE, nsave=24, + timezone=-6, + csv_point_nlevs=0, + snow_albedo_factor=1.1, + meteo_fl=driver_path, + cd=getCD(site_id, method='Hondzo')), + nml_args)) + + + ##parse the habitat and WTR info. next run will clobber output.nc + wtr_all = get_temp(file.path(run_dir, 'output.nc'), reference='surface') + ## drop the first n burn-in years + #years = as.POSIXlt(wtr$DateTime)$year + 1900 + #to_keep = !(years <= min(years) + nburn - 1) + #wtr_all = wtr[to_keep, ] + + core_metrics = necsc_thermal_metrics_core(run_dir, site_id) + + hansen_habitat = hansen_habitat_calc(run_dir, site_id) + + notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc')) + + nml = read_nml(file.path(run_dir, "glm2.nml")) + + if(have_cal){ + cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv')) + cal_data$site_id = site_id + cat('Calibration data calculated\n') + }else{ + cal_data = data.frame() #just use empy data frame if no cal data + cat('No Cal, calibration skipped\n') + } + + unlink(run_dir, recursive=TRUE) + + notaro_metrics$site_id = site_id + + all_data = list(wtr=wtr_all, core_metrics=core_metrics, + hansen_habitat=hansen_habitat, + site_id=site_id, + notaro_metrics=notaro_metrics, + nml=nml, + cal_data=cal_data) + + cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') + + return(all_data) + + }, error=function(e){ + unlink(run_dir, recursive=TRUE) + cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') + return(list(error=e, site_id)) + }) +} + + + +getnext = function(fname){ + i=0 + barefname = fname + while(file.exists(fname)){ + i=i+1 + fname = paste0(barefname, '.', i) + } + return(fname) +} + +wrapup_output = function(out, out_dir, years){ + + run_exists = file.exists(out_dir) + + if(!run_exists) {dir.create(out_dir, recursive=TRUE)} + + good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] + bad_data = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] + + sprintf('%i lakes ran\n', length(good_data)) + dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) + #drop the burn-in years + dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) + + hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) + hansen_habitat = subset(hansen_habitat, year %in% years) + + core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) + core_metrics = subset(core_metrics, year %in% years) + + notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) + + cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]})) + + model_config = lapply(good_data, function(x){x$nml}) + + notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) + write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file)) + write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + + + save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) + save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) + save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) + + rm(out, good_data, dframes) + gc() +} + + +################################################################################ +## Lets run Downscaled climate runs 1980-1999, 2020-2039, 2080:2099 +################################################################################ +gcm_driver_fun = function(site_id, dname){ + drivers = read.csv(get_driver_path(paste0(site_id, ''), driver_name = dname, timestep = 'daily'), header=TRUE) + #nldas = read.csv(get_driver_path(paste0(site_id, ''), driver_name = 'NLDAS'), header=TRUE) + #drivers = driver_nldas_debias_airt_sw(drivers, nldas) + drivers = driver_add_burnin_years(drivers, nyears=2) + drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off + driver_save(drivers) +} + +nldas_driver_fun = function(site_id, dname){ + nldas = read.csv(get_driver_path(site_id, driver_name = dname), header=TRUE) + drivers = driver_nldas_wind_debias(nldas) + drivers = driver_add_burnin_years(drivers, nyears=2) + drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off + #fix the 2-day offset in NLDAS data + drivers$time = drivers$time + as.difftime(-2, units='days') + driver_save(drivers) +} + +#This should be 1 to n, where n may be larger than total number of lakes to model +task_id = as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID', 'NA')) + +if(is.na(task_id)){ + stop("ERROR Can not read task_id, NA returned") +} + +task_offset = as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID_offset', 'NA')) +if(is.na(task_offset)){ + task_offset = 0 +} +task_id = task_id + task_offset + +config = read.table('config', header=TRUE, as.is=TRUE) + +driver_name = config$drivername +driver_url = config$driverurl +out_dir = file.path(config$outdir, driver_name) +set_driver_url(driver_url) + +if(driver_name == 'NLDAS'){ + driver_fun = nldas_driver_fun + yeargroups = list(1979:2015) +}else{ + driver_fun = gcm_driver_fun + yeargroups = list(1981:2000, 2040:2059, 2080:2099) +} + +to_run = as.character(unique(zmax$site_id)) +if(task_id > length(to_run)){ + sprintf('Skipping task_id:%i because greater than number of lakes to run') + q(save='no', status=0) +} + +site_id = to_run[task_id] + + +for(ygroup in yeargroups){ + start = Sys.time() + out = lapply(site_id, future_hab_wtr, + modern_era=ygroup, + secchi_function=secchi_standard, + driver_function=function(site_id){driver_fun(site_id, driver_name)}) + + wrapup_output(out, file.path(out_dir, site_id), years=ygroup) + + print(difftime(Sys.time(), start, units='hours')) + cat('on to the next\n') +} + + From 05bb77f99a9e7c8841c524ae411fea78b8e6fb09 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Thu, 25 Aug 2016 15:23:20 -0500 Subject: [PATCH 33/42] change albedo multiplier --- demo/slurm_array_hab_out_god_mode.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/slurm_array_hab_out_god_mode.R b/demo/slurm_array_hab_out_god_mode.R index 0f99553..5296f0b 100644 --- a/demo/slurm_array_hab_out_god_mode.R +++ b/demo/slurm_array_hab_out_god_mode.R @@ -78,7 +78,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_dri dt=3600, subdaily=FALSE, nsave=24, timezone=-6, csv_point_nlevs=0, - snow_albedo_factor=1.1, + snow_albedo_factor=1, meteo_fl=driver_path, cd=getCD(site_id, method='Hondzo')), nml_args)) From e9f332053d150b4421dbc3d941f3344fcc68244f Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Thu, 25 Aug 2016 15:24:26 -0500 Subject: [PATCH 34/42] version increment --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 567f159..d2c3c83 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.18 +Version: 4.2.19 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow From 1fa014378fad8a1285625cad3690246b70ebda5f Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Sat, 27 Aug 2016 09:04:05 -0500 Subject: [PATCH 35/42] change albedo factor to fix late off bias --- demo/slurm_array_hab_out_god_mode.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/demo/slurm_array_hab_out_god_mode.R b/demo/slurm_array_hab_out_god_mode.R index 5296f0b..297742a 100644 --- a/demo/slurm_array_hab_out_god_mode.R +++ b/demo/slurm_array_hab_out_god_mode.R @@ -78,7 +78,7 @@ future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_dri dt=3600, subdaily=FALSE, nsave=24, timezone=-6, csv_point_nlevs=0, - snow_albedo_factor=1, + snow_albedo_factor=0.85, meteo_fl=driver_path, cd=getCD(site_id, method='Hondzo')), nml_args)) From fa08b457acb5f7b84874a9a536e78cd2254c9767 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Sat, 27 Aug 2016 09:04:26 -0500 Subject: [PATCH 36/42] version increment --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index d2c3c83..f7bcd95 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.19 +Version: 4.2.20 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow From 3d3162afd84a5880ea8a8763e0bca4b0817a56e3 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Thu, 1 Sep 2016 16:07:59 -0500 Subject: [PATCH 37/42] increase size of zipfiles --- R/combine_output_data.R | 2 +- man/combine_output_data.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/combine_output_data.R b/R/combine_output_data.R index 311f113..2e3059e 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -104,7 +104,7 @@ combine_output_data = function(sim, path, fast_tmp=tempdir()){ #split up files into 1000 lake groups all_wtr_files = sort(unique(unlist(all_wtr_files))) - splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/500)) + splits = split(1:length(all_wtr_files), floor((1:length(all_wtr_files))/1000)) wtemp_zips = file.path(path, sim, paste0(sim, '_wtemp_', seq_along(splits), '.zip')) diff --git a/man/combine_output_data.Rd b/man/combine_output_data.Rd index d0eb538..7747dda 100644 --- a/man/combine_output_data.Rd +++ b/man/combine_output_data.Rd @@ -4,7 +4,7 @@ \alias{combine_output_data} \title{combine full sim run output data} \usage{ -combine_output_data(sim, path) +combine_output_data(sim, path, fast_tmp = tempdir()) } \description{ Combines all the individual compute node model files into From fef2d0118090fe2d1200fce178626528032d0db1 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Tue, 6 Sep 2016 13:31:45 -0500 Subject: [PATCH 38/42] sb archive and fix index dl --- DESCRIPTION | 2 +- NAMESPACE | 2 ++ R/get_driver_nhd.R | 4 +++ R/sb_archive_model.R | 54 +++++++++++++++++++++++++++++++++++++++++ man/sb_archive_model.Rd | 21 ++++++++++++++++ 5 files changed, 82 insertions(+), 1 deletion(-) create mode 100644 R/sb_archive_model.R create mode 100644 man/sb_archive_model.Rd diff --git a/DESCRIPTION b/DESCRIPTION index f7bcd95..ab134c5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.20 +Version: 4.2.21 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/NAMESPACE b/NAMESPACE index b465fe4..b7f3777 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(necsc_thermal_metrics_core) export(opti_thermal_habitat) export(populate_base_lake_nml) export(prep_run_glm_kd) +export(sb_archive_model) export(sens_seasonal_site) export(set_driver_url) export(summarize_notaro) @@ -51,6 +52,7 @@ import(jsonlite) import(lakeattributes) import(lubridate) import(rLakeAnalyzer) +import(sbtools) import(tidyr) importFrom(accelerometry,rle2) importFrom(insol,JD) diff --git a/R/get_driver_nhd.R b/R/get_driver_nhd.R index 7af6fb8..6aee4a8 100644 --- a/R/get_driver_nhd.R +++ b/R/get_driver_nhd.R @@ -106,6 +106,10 @@ get_driver_index = function(driver_name, loc_cache=TRUE){ if(substr(pkg_info$dvr_url, nchar(pkg_info$dvr_url)-3,nchar(pkg_info$dvr_url)) == '.zip'){ unzip(pkg_info$dvr_url, files = paste0('drivers_GLM_', driver_name, '/driver_index.tsv'), exdir=dirname(dest), junkpaths=TRUE) + }else if(substr(index_url, 1,7) == 'file://'){ + + dest = index_url + }else{ if(!download_helper(index_url, dest)){ stop('driver_index.tsv: unable to download for driver data:', driver_name) diff --git a/R/sb_archive_model.R b/R/sb_archive_model.R new file mode 100644 index 0000000..04d2b36 --- /dev/null +++ b/R/sb_archive_model.R @@ -0,0 +1,54 @@ +#' @title Upload and archive model to ScienceBase +#' +#' @description +#' Creates relevant child items a +#' +#' @param path Path to folder containing sim files +#' @param sb_root SB root item ID to create new items under +#' @param sbuser SB user name +#' @param sbpass SB password +#' +#' @import sbtools +#' +#' @export +sb_archive_model = function(path, sb_root, sbuser, sbpass){ + + sim = basename(path) + allfiles = dir(path) + ndone = 0 + pb = txtProgressBar(min = 0, max = length(allfiles)) + + authenticate_sb(sbuser, sbpass) + itm_title = paste0('Simulated lake temp metrics for ', sim) + sim_itm = item_create(parent_id=sb_root, title=itm_title) + + ## core metrics create/upload + core_met = item_create(parent_id=sim_itm, title=paste0(sim, ':Core thermal metrics')) + files = Sys.glob(file.path(path, '*_core_metrics.tsv')) + item_append_files(core_met, files) + setTxtProgressBar(pb, (ndone <- ndone+1)) + + ## fish habitat + fish_hab = item_create(parent_id=sim_itm, title=paste0(sim, ':Fish habitat metrics')) + files = Sys.glob(file.path(path, '*_fish_hab.tsv')) + item_append_files(fish_hab, files) + setTxtProgressBar(pb, (ndone <- ndone+1)) + + ## model configuration + mod_conf = item_create(parent_id=sim_itm, title=paste0(sim, ':Model configuration input')) + files = Sys.glob(file.path(path, '*_model_config.json')) + item_append_files(mod_conf, files) + setTxtProgressBar(pb, (ndone <- ndone+1)) + + + cat('starting wtr files upload...') + ## water temp files + wtr_raw = item_create(parent_id=sim_itm, title=paste0(sim, ':Raw water temperature')) + files = Sys.glob(file.path(path, '*_wtemp_*')) + for(i in 1:length(files)){ + item_append_files(wtr_raw, files=files[i]) + setTxtProgressBar(pb, (ndone <- ndone+1)) + } + + close(pb) +} diff --git a/man/sb_archive_model.Rd b/man/sb_archive_model.Rd new file mode 100644 index 0000000..e03c56a --- /dev/null +++ b/man/sb_archive_model.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/sb_archive_model.R +\name{sb_archive_model} +\alias{sb_archive_model} +\title{Upload and archive model to ScienceBase} +\usage{ +sb_archive_model(path, sb_root, sbuser, sbpass) +} +\arguments{ +\item{path}{Path to folder containing sim files} + +\item{sb_root}{SB root item ID to create new items under} + +\item{sbuser}{SB user name} + +\item{sbpass}{SB password} +} +\description{ +Creates relevant child items a +} + From df601e0eb6d5f89e251c9c057d874526acea9427 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Wed, 7 Sep 2016 10:22:33 -0500 Subject: [PATCH 39/42] combine error files on finish as well --- DESCRIPTION | 2 +- R/combine_output_data.R | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ab134c5..7adcf5c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.21 +Version: 4.2.22 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/combine_output_data.R b/R/combine_output_data.R index 2e3059e..f956076 100644 --- a/R/combine_output_data.R +++ b/R/combine_output_data.R @@ -23,6 +23,7 @@ combine_output_data = function(sim, path, fast_tmp=tempdir()){ cfg_path = paste0(path, sim, '/', sim, '_model_config.json') hansen_path = paste0(path, sim, '/', sim, '_fish_hab.tsv') cal_path = paste0(path, sim, '/', sim, '_calibration_data.tsv') + error_path = paste0(path, sim, '/', sim, '_error_output.tsv') ################################################################################ ## read and handle core metrics @@ -71,6 +72,22 @@ combine_output_data = function(sim, path, fast_tmp=tempdir()){ cat('Skipping nml config.\n') } + ################################################################################ + ### read and handle error files + bad_files = Sys.glob(paste0(path, sim, '/*/bad_data.Rdata*')) + + bad_data = list() + + for(i in 1:length(bad_files)){ + tmp = new.env() + load(bad_files[i], envir = tmp) + + bad_data = c(bad_data, tmp$bad_data) + } + + save(bad_data, file=error_path) + + ################################################################################ ###read and handle raw water temp data. wtr_files = Sys.glob(paste0(path, sim, '/*/best_all_wtr.Rdata*')) From a4d75440edf91015bd5c286c82e8bb24198d11dd Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Wed, 7 Sep 2016 11:16:14 -0500 Subject: [PATCH 40/42] change organization of running code --- DESCRIPTION | 2 +- R/run_necsc_lake.R | 240 ++++++++++++++++++++++++++ R/slurm_array_cleanup_missing_lakes.R | 31 ++++ demo/slurm_array_hab_out_god_mode.R | 225 +----------------------- 4 files changed, 276 insertions(+), 222 deletions(-) create mode 100644 R/run_necsc_lake.R create mode 100644 R/slurm_array_cleanup_missing_lakes.R diff --git a/DESCRIPTION b/DESCRIPTION index 7adcf5c..30c7178 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: mda.lakes Type: Package Title: Tools for combining models, data, and processing for lakes -Version: 4.2.22 +Version: 4.3.0 Date: 2015-12-03 Author: Luke Winslow, Jordan Read Maintainer: Luke Winslow diff --git a/R/run_necsc_lake.R b/R/run_necsc_lake.R new file mode 100644 index 0000000..810cbc2 --- /dev/null +++ b/R/run_necsc_lake.R @@ -0,0 +1,240 @@ + + + +#' @title Large wrapper function NECSC mod run +#' +#' @description +#' Runs a single NECSC lake given the default configuration for +#' both NLDAS and Notaro drivers +#' +#' @export +run_necsc_lake = function(site_id = NA, driver_name, out_dir){ + + if(is.na(site_id)){ + stop("ERROR site_id cannot be NA") + } + + library(lakeattributes) + library(mda.lakes) + library(dplyr) + library(glmtools) + source(system.file('demo/common_running_functions.R', package='mda.lakes')) + + Sys.setenv(TZ='GMT') + + + future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){ + + library(lakeattributes) + library(mda.lakes) + library(dplyr) + library(glmtools) + library(lubridate) + + fastdir = tempdir() + #for use on WiWSC Condor pool + if(file.exists('/mnt/ramdisk')){ + fastdir = '/mnt/ramdisk' + } + #for use on YETI + if(Sys.getenv('RAM_SCRATCH', unset = '') != ''){ + fastdir = Sys.getenv('RAM_SCRATCH', unset = '') + } + + + tryCatch({ + + run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1))) + cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') + dir.create(run_dir) + + #rename for dplyr + nhd_id = site_id + + #prep observations for calibration data + data(wtemp) + obs = filter(wtemp, site_id == nhd_id) %>% + transmute(DateTime=date, Depth=depth, temp=wtemp) %>% + filter(year(DateTime) %in% modern_era) + + have_cal = nrow(obs) > 0 + + if(have_cal){ + #having a weird issue with resample_to_field, make unique + obs = obs[!duplicated(obs[,1:2]), ] + + write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE) + } + + + #get driver data + driver_path = driver_function(site_id) + driver_path = gsub('\\\\', '/', driver_path) + + + kd_avg = secchi_function(site_id) #secchi_conv/mean(kds$secchi_avg, na.rm=TRUE) + + #run with different driver and ice sources + + prep_run_glm_kd(site_id=site_id, + path=run_dir, + years=modern_era, + kd=kd_avg, + nml_args=c(list( + dt=3600, subdaily=FALSE, nsave=24, + timezone=-6, + csv_point_nlevs=0, + snow_albedo_factor=0.85, + meteo_fl=driver_path, + cd=getCD(site_id, method='Hondzo')), + nml_args)) + + + ##parse the habitat and WTR info. next run will clobber output.nc + wtr_all = get_temp(file.path(run_dir, 'output.nc'), reference='surface') + ## drop the first n burn-in years + #years = as.POSIXlt(wtr$DateTime)$year + 1900 + #to_keep = !(years <= min(years) + nburn - 1) + #wtr_all = wtr[to_keep, ] + + core_metrics = necsc_thermal_metrics_core(run_dir, site_id) + + hansen_habitat = hansen_habitat_calc(run_dir, site_id) + + notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc')) + + nml = read_nml(file.path(run_dir, "glm2.nml")) + + if(have_cal){ + cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv')) + cal_data$site_id = site_id + cat('Calibration data calculated\n') + }else{ + cal_data = data.frame() #just use empy data frame if no cal data + cat('No Cal, calibration skipped\n') + } + + unlink(run_dir, recursive=TRUE) + + notaro_metrics$site_id = site_id + + all_data = list(wtr=wtr_all, core_metrics=core_metrics, + hansen_habitat=hansen_habitat, + site_id=site_id, + notaro_metrics=notaro_metrics, + nml=nml, + cal_data=cal_data) + + cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') + + return(all_data) + + }, error=function(e){ + unlink(run_dir, recursive=TRUE) + cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') + return(list(error=e, site_id)) + }) + } + + + + getnext = function(fname){ + i=0 + barefname = fname + while(file.exists(fname)){ + i=i+1 + fname = paste0(barefname, '.', i) + } + return(fname) + } + + wrapup_output = function(out, out_dir, years){ + + run_exists = file.exists(out_dir) + + if(!run_exists) {dir.create(out_dir, recursive=TRUE)} + + good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] + bad_data = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] + + sprintf('%i lakes ran\n', length(good_data)) + dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) + #drop the burn-in years + dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) + + hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) + hansen_habitat = subset(hansen_habitat, year %in% years) + + core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) + core_metrics = subset(core_metrics, year %in% years) + + notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) + + cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]})) + + model_config = lapply(good_data, function(x){x$nml}) + + notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) + write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file)) + write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + + + save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) + save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) + save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) + + rm(out, good_data, dframes) + gc() + } + + + ################################################################################ + ## Lets run Downscaled climate runs 1980-1999, 2020-2039, 2080:2099 + ################################################################################ + gcm_driver_fun = function(site_id, dname){ + drivers = read.csv(get_driver_path(paste0(site_id, ''), driver_name = dname, timestep = 'daily'), header=TRUE) + #nldas = read.csv(get_driver_path(paste0(site_id, ''), driver_name = 'NLDAS'), header=TRUE) + #drivers = driver_nldas_debias_airt_sw(drivers, nldas) + drivers = driver_add_burnin_years(drivers, nyears=2) + drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off + driver_save(drivers) + } + + nldas_driver_fun = function(site_id, dname){ + nldas = read.csv(get_driver_path(site_id, driver_name = dname), header=TRUE) + drivers = driver_nldas_wind_debias(nldas) + drivers = driver_add_burnin_years(drivers, nyears=2) + drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off + #fix the 2-day offset in NLDAS data + drivers$time = drivers$time + as.difftime(-2, units='days') + driver_save(drivers) + } + + + + if(driver_name == 'NLDAS'){ + driver_fun = nldas_driver_fun + yeargroups = list(1979:2015) + }else{ + driver_fun = gcm_driver_fun + yeargroups = list(1981:2000, 2040:2059, 2080:2099) + } + + + for(ygroup in yeargroups){ + start = Sys.time() + out = lapply(site_id, future_hab_wtr, + modern_era=ygroup, + secchi_function=secchi_standard, + driver_function=function(site_id){driver_fun(site_id, driver_name)}) + + wrapup_output(out, file.path(out_dir, site_id), years=ygroup) + + print(difftime(Sys.time(), start, units='hours')) + cat('on to the next\n') + } + + +} \ No newline at end of file diff --git a/R/slurm_array_cleanup_missing_lakes.R b/R/slurm_array_cleanup_missing_lakes.R new file mode 100644 index 0000000..cbb2a8a --- /dev/null +++ b/R/slurm_array_cleanup_missing_lakes.R @@ -0,0 +1,31 @@ +### Cleanup missing run lakes + +library(lakeattributes) +library(mda.lakes) + +################################################################################ +### read in config file config +config = read.table('config', header=TRUE, as.is=TRUE) + +driver_name = config$drivername +driver_url = config$driverurl +out_dir = file.path(config$outdir, driver_name) +set_driver_url(driver_url) + +rundirs = Sys.glob(file.path(out_dir, '*')) + +for(i in 1:length(rundirs)){ + #NLDAS will have at least 5 outfiles + if(length(dir(rundirs[i])) > 4){ + next + } + + site_id = basename(rundirs[i]) + cat('Running ', site_id, '...\n') + + run_necsc_lake(site_id, driver_name, out_dir) +} + + + + diff --git a/demo/slurm_array_hab_out_god_mode.R b/demo/slurm_array_hab_out_god_mode.R index 297742a..02edd77 100644 --- a/demo/slurm_array_hab_out_god_mode.R +++ b/demo/slurm_array_hab_out_god_mode.R @@ -2,210 +2,11 @@ ## This uses SLURM array running to run individual lakes ## MPI has been too troublesome -#first load some modules: -# module load tools/netcdf-4.3.2-gnu - - library(lakeattributes) library(mda.lakes) -library(dplyr) -library(glmtools) -source(system.file('demo/common_running_functions.R', package='mda.lakes')) - -Sys.setenv(TZ='GMT') - -# clusterEvalQ(c1, Sys.setenv(TZ='GMT')) - - -future_hab_wtr = function(site_id, modern_era=1979:2012, driver_function=get_driver_path, secchi_function=function(site_id){}, nml_args=list()){ - - library(lakeattributes) - library(mda.lakes) - library(dplyr) - library(glmtools) - library(lubridate) - - fastdir = tempdir() - #for use on WiWSC Condor pool - if(file.exists('/mnt/ramdisk')){ - fastdir = '/mnt/ramdisk' - } - #for use on YETI - if(Sys.getenv('RAM_SCRATCH', unset = '') != ''){ - fastdir = Sys.getenv('RAM_SCRATCH', unset = '') - } - - - tryCatch({ - - run_dir = file.path(fastdir, paste0(site_id, '_', sample.int(1e9, size=1))) - cat('START:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') - dir.create(run_dir) - - #rename for dplyr - nhd_id = site_id - - #prep observations for calibration data - data(wtemp) - obs = filter(wtemp, site_id == nhd_id) %>% - transmute(DateTime=date, Depth=depth, temp=wtemp) %>% - filter(year(DateTime) %in% modern_era) - - have_cal = nrow(obs) > 0 - - if(have_cal){ - #having a weird issue with resample_to_field, make unique - obs = obs[!duplicated(obs[,1:2]), ] - - write.table(obs, file.path(run_dir, 'obs.tsv'), sep='\t', row.names=FALSE) - } - - - #get driver data - driver_path = driver_function(site_id) - driver_path = gsub('\\\\', '/', driver_path) - - - kd_avg = secchi_function(site_id) #secchi_conv/mean(kds$secchi_avg, na.rm=TRUE) - - #run with different driver and ice sources - - prep_run_glm_kd(site_id=site_id, - path=run_dir, - years=modern_era, - kd=kd_avg, - nml_args=c(list( - dt=3600, subdaily=FALSE, nsave=24, - timezone=-6, - csv_point_nlevs=0, - snow_albedo_factor=0.85, - meteo_fl=driver_path, - cd=getCD(site_id, method='Hondzo')), - nml_args)) - - - ##parse the habitat and WTR info. next run will clobber output.nc - wtr_all = get_temp(file.path(run_dir, 'output.nc'), reference='surface') - ## drop the first n burn-in years - #years = as.POSIXlt(wtr$DateTime)$year + 1900 - #to_keep = !(years <= min(years) + nburn - 1) - #wtr_all = wtr[to_keep, ] - - core_metrics = necsc_thermal_metrics_core(run_dir, site_id) - - hansen_habitat = hansen_habitat_calc(run_dir, site_id) - - notaro_metrics = summarize_notaro(paste0(run_dir, '/output.nc')) - - nml = read_nml(file.path(run_dir, "glm2.nml")) - - if(have_cal){ - cal_data = resample_to_field(file.path(run_dir, 'output.nc'), file.path(run_dir,'obs.tsv')) - cal_data$site_id = site_id - cat('Calibration data calculated\n') - }else{ - cal_data = data.frame() #just use empy data frame if no cal data - cat('No Cal, calibration skipped\n') - } - - unlink(run_dir, recursive=TRUE) - - notaro_metrics$site_id = site_id - - all_data = list(wtr=wtr_all, core_metrics=core_metrics, - hansen_habitat=hansen_habitat, - site_id=site_id, - notaro_metrics=notaro_metrics, - nml=nml, - cal_data=cal_data) - - cat('END:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') - - return(all_data) - - }, error=function(e){ - unlink(run_dir, recursive=TRUE) - cat('FAIL:', format(Sys.time(), '%m-%d %H:%M:%S'), Sys.info()[['nodename']], site_id, '\n') - return(list(error=e, site_id)) - }) -} - - - -getnext = function(fname){ - i=0 - barefname = fname - while(file.exists(fname)){ - i=i+1 - fname = paste0(barefname, '.', i) - } - return(fname) -} - -wrapup_output = function(out, out_dir, years){ - - run_exists = file.exists(out_dir) - - if(!run_exists) {dir.create(out_dir, recursive=TRUE)} - - good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] - bad_data = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] - - sprintf('%i lakes ran\n', length(good_data)) - dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) - #drop the burn-in years - dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) - - hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) - hansen_habitat = subset(hansen_habitat, year %in% years) - - core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) - core_metrics = subset(core_metrics, year %in% years) - - notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) - - cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]})) - - model_config = lapply(good_data, function(x){x$nml}) - - notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) - write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file)) - write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - - - save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) - save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) - save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) - - rm(out, good_data, dframes) - gc() -} - ################################################################################ -## Lets run Downscaled climate runs 1980-1999, 2020-2039, 2080:2099 -################################################################################ -gcm_driver_fun = function(site_id, dname){ - drivers = read.csv(get_driver_path(paste0(site_id, ''), driver_name = dname, timestep = 'daily'), header=TRUE) - #nldas = read.csv(get_driver_path(paste0(site_id, ''), driver_name = 'NLDAS'), header=TRUE) - #drivers = driver_nldas_debias_airt_sw(drivers, nldas) - drivers = driver_add_burnin_years(drivers, nyears=2) - drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off - driver_save(drivers) -} - -nldas_driver_fun = function(site_id, dname){ - nldas = read.csv(get_driver_path(site_id, driver_name = dname), header=TRUE) - drivers = driver_nldas_wind_debias(nldas) - drivers = driver_add_burnin_years(drivers, nyears=2) - drivers = driver_add_rain(drivers, month=7:9, rain_add=0.5) ##keep the lakes topped off - #fix the 2-day offset in NLDAS data - drivers$time = drivers$time + as.difftime(-2, units='days') - driver_save(drivers) -} - +### read in env var config #This should be 1 to n, where n may be larger than total number of lakes to model task_id = as.numeric(Sys.getenv('SLURM_ARRAY_TASK_ID', 'NA')) @@ -219,6 +20,8 @@ if(is.na(task_offset)){ } task_id = task_id + task_offset +################################################################################ +### read in config file config config = read.table('config', header=TRUE, as.is=TRUE) driver_name = config$drivername @@ -226,14 +29,6 @@ driver_url = config$driverurl out_dir = file.path(config$outdir, driver_name) set_driver_url(driver_url) -if(driver_name == 'NLDAS'){ - driver_fun = nldas_driver_fun - yeargroups = list(1979:2015) -}else{ - driver_fun = gcm_driver_fun - yeargroups = list(1981:2000, 2040:2059, 2080:2099) -} - to_run = as.character(unique(zmax$site_id)) if(task_id > length(to_run)){ sprintf('Skipping task_id:%i because greater than number of lakes to run') @@ -243,17 +38,5 @@ if(task_id > length(to_run)){ site_id = to_run[task_id] -for(ygroup in yeargroups){ - start = Sys.time() - out = lapply(site_id, future_hab_wtr, - modern_era=ygroup, - secchi_function=secchi_standard, - driver_function=function(site_id){driver_fun(site_id, driver_name)}) - - wrapup_output(out, file.path(out_dir, site_id), years=ygroup) - - print(difftime(Sys.time(), start, units='hours')) - cat('on to the next\n') -} - +run_necsc_lake(site_id, driver_name, out_dir) From 31566fa066e87b3c5d16c89d363263fb325ea8d6 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Wed, 7 Sep 2016 11:24:02 -0500 Subject: [PATCH 41/42] build for documentation and namespace and move file --- NAMESPACE | 1 + R/run_necsc_lake.R | 1 - demo/mpi_output_data.R | 2 +- {R => demo}/slurm_array_cleanup_missing_lakes.R | 0 man/run_necsc_lake.Rd | 13 +++++++++++++ 5 files changed, 15 insertions(+), 2 deletions(-) rename {R => demo}/slurm_array_cleanup_missing_lakes.R (100%) create mode 100644 man/run_necsc_lake.Rd diff --git a/NAMESPACE b/NAMESPACE index b7f3777..ab8fca8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -42,6 +42,7 @@ export(necsc_thermal_metrics_core) export(opti_thermal_habitat) export(populate_base_lake_nml) export(prep_run_glm_kd) +export(run_necsc_lake) export(sb_archive_model) export(sens_seasonal_site) export(set_driver_url) diff --git a/R/run_necsc_lake.R b/R/run_necsc_lake.R index 810cbc2..a0a719e 100644 --- a/R/run_necsc_lake.R +++ b/R/run_necsc_lake.R @@ -236,5 +236,4 @@ run_necsc_lake = function(site_id = NA, driver_name, out_dir){ cat('on to the next\n') } - } \ No newline at end of file diff --git a/demo/mpi_output_data.R b/demo/mpi_output_data.R index 6dbb6b3..737e1a7 100644 --- a/demo/mpi_output_data.R +++ b/demo/mpi_output_data.R @@ -1,5 +1,5 @@ -library(mda.lakes) +library(mda.lakes) config = read.table('config', header=TRUE, as.is=TRUE) diff --git a/R/slurm_array_cleanup_missing_lakes.R b/demo/slurm_array_cleanup_missing_lakes.R similarity index 100% rename from R/slurm_array_cleanup_missing_lakes.R rename to demo/slurm_array_cleanup_missing_lakes.R diff --git a/man/run_necsc_lake.Rd b/man/run_necsc_lake.Rd new file mode 100644 index 0000000..c86d954 --- /dev/null +++ b/man/run_necsc_lake.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/run_necsc_lake.R +\name{run_necsc_lake} +\alias{run_necsc_lake} +\title{Large wrapper function NECSC mod run} +\usage{ +run_necsc_lake(site_id = NA, driver_name, out_dir) +} +\description{ +Runs a single NECSC lake given the default configuration for +both NLDAS and Notaro drivers +} + From e22a35dd13a5226f0bf3f9f40eca01bd533ad240 Mon Sep 17 00:00:00 2001 From: Luke Winslow Date: Wed, 7 Sep 2016 15:11:00 -0500 Subject: [PATCH 42/42] better error handling on results output --- R/run_necsc_lake.R | 57 +++++++++++++++++++++++----------------------- 1 file changed, 29 insertions(+), 28 deletions(-) diff --git a/R/run_necsc_lake.R b/R/run_necsc_lake.R index a0a719e..2e2c20d 100644 --- a/R/run_necsc_lake.R +++ b/R/run_necsc_lake.R @@ -156,37 +156,38 @@ run_necsc_lake = function(site_id = NA, driver_name, out_dir){ good_data = out[!unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] bad_data = out[unlist(lapply(out, function(x){'error' %in% names(x) || is.null(x)}))] - - sprintf('%i lakes ran\n', length(good_data)) - dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) - #drop the burn-in years - dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) - - hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) - hansen_habitat = subset(hansen_habitat, year %in% years) - - core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) - core_metrics = subset(core_metrics, year %in% years) - - notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) - - cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]})) - - model_config = lapply(good_data, function(x){x$nml}) - - notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) - write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file)) - write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) - write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) - save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) - save('bad_data', file = getnext(file.path(out_dir, 'bad_data.Rdata'))) - save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) + sprintf('%i lakes ran\n', length(good_data)) + if(length(good_data) > 0){ + dframes = lapply(good_data, function(x){tmp = x[[1]]; tmp$site_id=x[['site_id']]; return(tmp)}) + #drop the burn-in years + dframes = lapply(dframes, function(df){subset(df, DateTime > as.POSIXct('1979-01-01'))}) + + hansen_habitat = do.call(rbind, lapply(good_data, function(x){x[['hansen_habitat']]})) + hansen_habitat = subset(hansen_habitat, year %in% years) + + core_metrics = do.call(rbind, lapply(good_data, function(x){x[['core_metrics']]})) + core_metrics = subset(core_metrics, year %in% years) + + notaro_metrics = do.call(rbind, lapply(good_data, function(x){x[['notaro_metrics']]})) + + cal_data = do.call(rbind, lapply(good_data, function(x){x[['cal_data']]})) + + model_config = lapply(good_data, function(x){x$nml}) + + notaro_file = file.path(out_dir, paste0('notaro_metrics_', paste0(range(years), collapse='_'), '.tsv')) + write.table(notaro_metrics, notaro_file, sep='\t', row.names=FALSE, append=file.exists(notaro_file), col.names=!file.exists(notaro_file)) + write.table(hansen_habitat, file.path(out_dir, 'best_hansen_hab.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(core_metrics, file.path(out_dir, 'best_core_metrics.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + write.table(cal_data, file.path(out_dir, 'best_cal_data.tsv'), sep='\t', row.names=FALSE, append=run_exists, col.names=!run_exists) + + + save('dframes', file = getnext(file.path(out_dir, 'best_all_wtr.Rdata'))) + save('model_config', file=getnext(file.path(out_dir, 'model_config.Rdata'))) + } - rm(out, good_data, dframes) - gc() }