From fdf930a6162e1e0f4adafdccfa6434b6128042e0 Mon Sep 17 00:00:00 2001 From: rsquaredin Date: Thu, 14 May 2020 17:01:21 +0530 Subject: [PATCH] fix addins --- DESCRIPTION | 1 - R/rbin-addins.R | 16 +++--- R/rbin-manual.R | 4 +- R/rbin-shiny.R | 126 +++++++++++++++++++++++++++++++++++++++++++++++ man/rbinAddin.Rd | 2 +- 5 files changed, 136 insertions(+), 13 deletions(-) create mode 100644 R/rbin-shiny.R diff --git a/DESCRIPTION b/DESCRIPTION index a87edb3..d4c7530 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,7 +22,6 @@ Suggests: graphics, knitr, miniUI, - rlang, rmarkdown, rstudioapi, shiny, diff --git a/R/rbin-addins.R b/R/rbin-addins.R index deab457..dfa42f6 100644 --- a/R/rbin-addins.R +++ b/R/rbin-addins.R @@ -8,7 +8,7 @@ #' \dontrun{ #' rbinAddin(data = mbank) #' } -#' +#' #' @export #' rbinAddin <- function(data = NULL) { @@ -171,7 +171,7 @@ rbinAddin <- function(data = NULL) { }) compute_bins <- shiny::eventReactive(input$create_bins, { - rbin_manual(data1(), input$resp_var, input$pred_var, bins_values()) + shiny_rbin_manual(data1(), input$resp_var, input$pred_var, bins_values()) }) down_bins <- shiny::reactive({ @@ -364,11 +364,12 @@ rbinFactorAddin <- function(data = NULL) { }) new_comb <- shiny::eventReactive(input$create_bins, { - rbin_factor_combine(data1(), !! rlang::sym(as.character(input$pred_var)), as.character(selected_levs()), as.character(input$new_lev)) + shiny_rbin_factor_combine(data1(), as.character(input$pred_var), + as.character(selected_levs()), as.character(input$new_lev)) }) woe_man <- shiny::eventReactive(input$create_bins, { - rbin_factor(new_comb(), !! rlang::sym(as.character(input$resp_var)), !! rlang::sym(as.character(input$pred_var))) + shiny_rbin_factor(new_comb(), as.character(input$resp_var), as.character(input$pred_var)) }) down_bins <- shiny::reactive({ @@ -376,7 +377,8 @@ rbinFactorAddin <- function(data = NULL) { }) woe_plot <- shiny::eventReactive(input$create_bins, { - graphics::plot(rbin_factor(new_comb(), !! rlang::sym(as.character(input$resp_var)), !! rlang::sym(as.character(input$pred_var)))) + graphics::plot(shiny_rbin_factor(new_comb(), as.character(input$resp_var), + as.character(input$pred_var))) }) output$woe_manual <- shiny::renderPrint({ @@ -387,10 +389,6 @@ rbinFactorAddin <- function(data = NULL) { woe_plot() }) - create_woe <- shiny::reactive({ - rbin_factor_create(new_comb(), !! rlang::sym(as.character(input$pred_var))) - }) - shiny::observeEvent(input$done, { shiny::stopApp() }) diff --git a/R/rbin-manual.R b/R/rbin-manual.R index c689fb4..c421a9f 100644 --- a/R/rbin-manual.R +++ b/R/rbin-manual.R @@ -37,8 +37,8 @@ rbin_manual.default <- function(data = NULL, response = NULL, predictor = NULL, resp <- deparse(substitute(response)) pred <- deparse(substitute(predictor)) - var_names <- names(data[, c(resp, pred)]) - prep_data <- data[, c(resp, pred)] + var_names <- names(data[c(resp, pred)]) + prep_data <- data[c(resp, pred)] if (include_na) { bm <- prep_data diff --git a/R/rbin-shiny.R b/R/rbin-shiny.R new file mode 100644 index 0000000..cc9d42c --- /dev/null +++ b/R/rbin-shiny.R @@ -0,0 +1,126 @@ +shiny_rbin_manual <- function(data = NULL, response = NULL, predictor = NULL, + cut_points = NULL, include_na = TRUE) { + + resp <- response + pred <- predictor + + var_names <- names(data[c(resp, pred)]) + prep_data <- data[c(resp, pred)] + + if (include_na) { + bm <- prep_data + } else { + bm <- na.omit(prep_data) + } + + colnames(bm) <- c("response", "predictor") + + bm$bin <- NA + byd <- bm$predictor + l_freq <- append(min(byd, na.rm = TRUE), cut_points) + u_freq <- c(cut_points, (max(byd, na.rm = TRUE) + 1)) + bins <- length(cut_points) + 1 + + for (i in seq_len(bins)) { + bm$bin[bm$predictor >= l_freq[i] & bm$predictor < u_freq[i]] <- i + } + + k <- bin_create(bm) + sym_sign <- c(rep("<", (bins - 1)), ">=") + fbin2 <- f_bin(u_freq) + intervals <- create_intervals(sym_sign, fbin2) + + if (include_na) { + + na_present <- nrow(k) > bins + + if (na_present) { + intervals <- rbind(intervals, cut_point = 'NA') + } + + } + + result <- list(bins = cbind(intervals, k), + method = "Manual", + vars = var_names, + lower_cut = l_freq, + upper_cut = u_freq) + + class(result) <- c("rbin_manual") + return(result) + +} + + +shiny_rbin_factor_combine <- function(data, var, new_var, new_name) { + + vars <- var + mydata <- data[[vars]] + current_lev <- levels(mydata) + l <- length(new_var) + + for (i in seq_len(l)) { + current_lev <- gsub(new_var[i], new_name, current_lev) + } + + levels(mydata) <- current_lev + data[vars] <- NULL + out <- cbind(data, mydata) + nl <- ncol(out) + names(out)[nl] <- vars + + return(out) + +} + +shiny_rbin_factor <- function(data = NULL, response = NULL, predictor = NULL, include_na = TRUE) { + + resp <- response + pred <- predictor + + var_names <- names(data[, c(resp, pred)]) + prep_data <- data[, c(resp, pred)] + + if (include_na) { + bm <- prep_data + } else { + bm <- na.omit(prep_data) + } + + colnames(bm) <- c("response", "predictor") + + bm <- data.table(bm) + + # group and summarize + bm_group <- bm[, .(bin_count = .N, + good = sum(response == 1), + bad = sum(response == 0)), + by = predictor] + + # create new columns + bm_group[, ':='(bin_cum_count = cumsum(bin_count), + good_cum_count = cumsum(good), + bad_cum_count = cumsum(bad), + bin_prop = bin_count / sum(bin_count), + good_rate = good / bin_count, + bad_rate = bad / bin_count, + good_dist = good / sum(good), + bad_dist = bad / sum(bad))] + + bm_group[, woe := log(bad_dist / good_dist)] + bm_group[, dist_diff := bad_dist - good_dist,] + bm_group[, iv := dist_diff * woe,] + bm_group[, entropy := (-1) * (((good / bin_count) * log2(good / bin_count)) + + ((bad / bin_count) * log2(bad / bin_count)))] + bm_group[, prop_entropy := (bin_count / sum(bin_count)) * entropy] + + setDF(bm_group) + colnames(bm_group)[1] <- 'level' + + result <- list(bins = bm_group, method = "Custom", vars = var_names) + + class(result) <- c("rbin_factor") + return(result) + +} + diff --git a/man/rbinAddin.Rd b/man/rbinAddin.Rd index e02990b..6f30656 100644 --- a/man/rbinAddin.Rd +++ b/man/rbinAddin.Rd @@ -16,5 +16,5 @@ Manually bin continuous data using weight of evidence. \dontrun{ rbinAddin(data = mbank) } - + }