diff --git a/DESCRIPTION b/DESCRIPTION index f9aa3b8..4c89825 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: betaselectr Title: Selective Standardization in Structural Equation Models -Version: 0.0.1.14 +Version: 0.0.1.15 Authors@R: c(person(given = "Shu Fai", family = "Cheung", diff --git a/NEWS.md b/NEWS.md index af4a08d..50623c8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# betaselectr 0.0.1.14 +# betaselectr 0.0.1.15 - Added `lm_betaselect()` and related methods and helper functions. @@ -77,4 +77,8 @@ to print confidence intervals by default, if available. (0.0.1.13) +- Added `skip_response` to + `lm_betaselect()` and `glm_betaselect()` + as a convenient way to skip standardizing + the response variables. (0.0.1.15) diff --git a/R/lm_betaselect.R b/R/lm_betaselect.R index d1deb4e..bad876a 100644 --- a/R/lm_betaselect.R +++ b/R/lm_betaselect.R @@ -239,6 +239,17 @@ #' at the same time. Default is `NULL`, #' and only `to_standardize` is used. #' +#' @param skip_response Logical. If +#' `TRUE`, will not standardize the +#' response (outcome) variable even if +#' it appears in `to_standardize` or +#' `to_standardize` is not specified. +#' Used for models such as logistic +#' regression models in which there are +#' some restrictions on the response +#' variables (e.g., only 0 or 1 for +#' logistic regression). +#' #' @param do_boot Whether bootstrapping #' will be conducted. Default is `TRUE`. #' @@ -352,6 +363,7 @@ lm_betaselect <- function(..., to_standardize = NULL, not_to_standardize = NULL, + skip_response = FALSE, do_boot = TRUE, bootstrap = 100L, iseed = NULL, @@ -408,7 +420,10 @@ lm_betaselect <- function(..., input_data = input_data, to_standardize = to_standardize, not_to_standardize = not_to_standardize, - skip_categorical_x = TRUE + skip_categorical_x = TRUE, + skip_response = skip_response, + model_call = model_call, + org_call = my_call ) # Do standardization input_data_z <- input_data @@ -510,6 +525,7 @@ lm_betaselect <- function(..., glm_betaselect <- function(..., to_standardize = NULL, not_to_standardize = NULL, + skip_response = FALSE, do_boot = TRUE, bootstrap = 100L, iseed = NULL, diff --git a/R/lm_betaselect_helpers.R b/R/lm_betaselect_helpers.R index 982826a..d6a80d6 100644 --- a/R/lm_betaselect_helpers.R +++ b/R/lm_betaselect_helpers.R @@ -48,6 +48,7 @@ fix_to_standardize_lm <- function(object, # if (length(prods) > 0) { # to_standardize <- setdiff(to_standardize, names(prods)) # } + browser() to_standardize } @@ -58,7 +59,10 @@ fix_to_standardize_lm_data <- function(object, input_data, to_standardize = ".all.", not_to_standardize = NULL, - skip_categorical_x = TRUE) { + skip_categorical_x = TRUE, + skip_response = FALSE, + model_call = NULL, + org_call = NULL) { if (!identical(to_standardize, ".all.") && !is.null(not_to_standardize)) { stop("Do not specify both to_standardize and not_to_standardize.") @@ -96,6 +100,11 @@ fix_to_standardize_lm_data <- function(object, if ((length(cat_vars) > 0) && skip_categorical_x) { to_standardize <- setdiff(to_standardize, cat_vars) } + if (skip_response) { + lm_terms <- stats::terms(object) + lm_y <- all.vars(lm_terms)[attr(lm_terms, "response")] + to_standardize <- setdiff(to_standardize, lm_y) + } # if (length(prods) > 0) { # to_standardize <- setdiff(to_standardize, names(prods)) # } diff --git a/README.md b/README.md index 20d4e45..0702c29 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Not ready for use. # betaselectr: Do selective standardization in structural equation models and regression models -(Version 0.0.1.14, updated on 2024-10-30, [release history](https://sfcheung.github.io/betaselectr/news/index.html)) +(Version 0.0.1.15, updated on 2024-10-30, [release history](https://sfcheung.github.io/betaselectr/news/index.html)) It computes Beta_Select, standardization in structural equation models with only diff --git a/man/lm_betaselect.Rd b/man/lm_betaselect.Rd index 6e22f90..2b46b56 100644 --- a/man/lm_betaselect.Rd +++ b/man/lm_betaselect.Rd @@ -13,6 +13,7 @@ lm_betaselect( ..., to_standardize = NULL, not_to_standardize = NULL, + skip_response = FALSE, do_boot = TRUE, bootstrap = 100L, iseed = NULL, @@ -27,6 +28,7 @@ glm_betaselect( ..., to_standardize = NULL, not_to_standardize = NULL, + skip_response = FALSE, do_boot = TRUE, bootstrap = 100L, iseed = NULL, @@ -81,6 +83,17 @@ cannot be ued with \code{to_standardize} at the same time. Default is \code{NULL}, and only \code{to_standardize} is used.} +\item{skip_response}{Logical. If +\code{TRUE}, will not standardize the +response (outcome) variable even if +it appears in \code{to_standardize} or +\code{to_standardize} is not specified. +Used for models such as logistic +regression models in which there are +some restrictions on the response +variables (e.g., only 0 or 1 for +logistic regression).} + \item{do_boot}{Whether bootstrapping will be conducted. Default is \code{TRUE}.} diff --git a/tests/testthat/test_glm_betaselect_skip_def.R b/tests/testthat/test_glm_betaselect_skip_def.R new file mode 100644 index 0000000..9368f4c --- /dev/null +++ b/tests/testthat/test_glm_betaselect_skip_def.R @@ -0,0 +1,14 @@ +library(testthat) + +dat <- data_test_mod_cat + +dat$dv <- ifelse(dat$dv > mean(dat$dv), + yes = 1, + no = 0) +test_that("skip response", { + expect_error(glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, to_standardize = "dv", do_boot = FALSE, family = binomial), + "y values", fixed = TRUE) + expect_error(glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, do_boot = FALSE, family = binomial), + "y values", fixed = TRUE) + expect_no_error(glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, skip_response = TRUE, do_boot = FALSE, family = binomial)) +}) diff --git a/vignettes/betaselectr_glm.Rmd b/vignettes/betaselectr_glm.Rmd index 52f0620..a745f3a 100644 --- a/vignettes/betaselectr_glm.Rmd +++ b/vignettes/betaselectr_glm.Rmd @@ -233,7 +233,8 @@ including product terms. Suppose we only need to solve the first problem, standardizing all -numeric variables, +numeric variables except for the +response variable (which is binary), with the product term computed after `iv` and `mod` are standardized. @@ -242,7 +243,7 @@ are standardized. ``` r glm_beta_select <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data = data_test_mod_cat_binary, - not_to_standardize = "dv", + skip_response = TRUE, family = binomial(), do_boot = FALSE) ``` @@ -262,9 +263,9 @@ variable (`dv` in this example) may not be meaningful or may even be not allowed. In the case of logistic regression, the outcome variable need to be 0 or 1 only. -Therefore, `not_to_standardize` is set to -`"dv"`, the name of the outcome variable, -to request that it is *not* standardized. +Therefore, `skip_response` is set to +`TRUE`, to request that the response +(outcome) variable is *not* standardized. Moreover, categorical variables (factors and string variables) will not be standardized. @@ -286,7 +287,7 @@ summary(glm_beta_select) #> Waiting for profiling to be done... #> Call to glm_betaselect(): #> betaselectr::lm_betaselect(formula = dv ~ iv * mod + cov1 + cat1, -#> family = binomial(), data = data_test_mod_cat_binary, not_to_standardize = "dv", +#> family = binomial(), data = data_test_mod_cat_binary, skip_response = TRUE, #> do_boot = FALSE, model_call = "glm") #> #> Variable(s) standardized: iv, mod, cov1 @@ -367,7 +368,7 @@ set: glm_beta_select_boot <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data = data_test_mod_cat_binary, family = binomial(), - not_to_standardize = "dv", + skip_response = TRUE, bootstrap = 5000, iseed = 4567) ``` @@ -392,7 +393,7 @@ This is the output of `summary()` summary(glm_beta_select_boot) #> Call to glm_betaselect(): #> betaselectr::lm_betaselect(formula = dv ~ iv * mod + cov1 + cat1, -#> family = binomial(), data = data_test_mod_cat_binary, not_to_standardize = "dv", +#> family = binomial(), data = data_test_mod_cat_binary, skip_response = TRUE, #> bootstrap = 5000, iseed = 4567, model_call = "glm") #> #> Variable(s) standardized: iv, mod, cov1 @@ -470,6 +471,7 @@ this, setting glm_beta_select_boot_1 <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data = data_test_mod_cat_binary, to_standardize = c("iv", "cov1"), + skip_response = TRUE, family = binomial(), bootstrap = 5000, iseed = 4567) @@ -485,6 +487,7 @@ this call, and set glm_beta_select_boot_2 <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data = data_test_mod_cat_binary, not_to_standardize = c("dv", "mod"), + skip_response = TRUE, family = binomial(), bootstrap = 5000, iseed = 4567) @@ -500,7 +503,8 @@ summary(glm_beta_select_boot_1) #> Call to glm_betaselect(): #> betaselectr::lm_betaselect(formula = dv ~ iv * mod + cov1 + cat1, #> family = binomial(), data = data_test_mod_cat_binary, to_standardize = c("iv", -#> "cov1"), bootstrap = 5000, iseed = 4567, model_call = "glm") +#> "cov1"), skip_response = TRUE, bootstrap = 5000, iseed = 4567, +#> model_call = "glm") #> #> Variable(s) standardized: iv, cov1 #> diff --git a/vignettes/betaselectr_glm.Rmd.original b/vignettes/betaselectr_glm.Rmd.original index 26fe74f..95882e7 100644 --- a/vignettes/betaselectr_glm.Rmd.original +++ b/vignettes/betaselectr_glm.Rmd.original @@ -181,7 +181,8 @@ including product terms. Suppose we only need to solve the first problem, standardizing all -numeric variables, +numeric variables except for the +response variable (which is binary), with the product term computed after `iv` and `mod` are standardized. @@ -189,7 +190,7 @@ are standardized. ```{r, results = FALSE} glm_beta_select <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data = data_test_mod_cat_binary, - not_to_standardize = "dv", + skip_response = TRUE, family = binomial(), do_boot = FALSE) ``` @@ -209,9 +210,9 @@ variable (`dv` in this example) may not be meaningful or may even be not allowed. In the case of logistic regression, the outcome variable need to be 0 or 1 only. -Therefore, `not_to_standardize` is set to -`"dv"`, the name of the outcome variable, -to request that it is *not* standardized. +Therefore, `skip_response` is set to +`TRUE`, to request that the response +(outcome) variable is *not* standardized. Moreover, categorical variables (factors and string variables) will not be standardized. @@ -269,7 +270,7 @@ set: glm_beta_select_boot <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data = data_test_mod_cat_binary, family = binomial(), - not_to_standardize = "dv", + skip_response = TRUE, bootstrap = 5000, iseed = 4567) ``` @@ -330,6 +331,7 @@ this, setting glm_beta_select_boot_1 <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data = data_test_mod_cat_binary, to_standardize = c("iv", "cov1"), + skip_response = TRUE, family = binomial(), bootstrap = 5000, iseed = 4567) @@ -344,6 +346,7 @@ this call, and set glm_beta_select_boot_2 <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data = data_test_mod_cat_binary, not_to_standardize = c("dv", "mod"), + skip_response = TRUE, family = binomial(), bootstrap = 5000, iseed = 4567)