diff --git a/DESCRIPTION b/DESCRIPTION index 135fcd7..b8472f1 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: betaselectr Title: Selective Standardization in Structural Equation Models -Version: 0.0.1.3 +Version: 0.0.1.4 Authors@R: c(person(given = "Shu Fai", family = "Cheung", diff --git a/NAMESPACE b/NAMESPACE index 85ee091..868cae7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,15 +1,25 @@ # Generated by roxygen2: do not edit by hand +S3method(anova,glm_betaselect) S3method(anova,lm_betaselect) +S3method(coef,glm_betaselect) S3method(coef,lm_betaselect) +S3method(confint,glm_betaselect) S3method(confint,lm_betaselect) +S3method(getCall,glm_betaselect) S3method(getCall,lm_betaselect) +S3method(predict,glm_betaselect) S3method(predict,lm_betaselect) +S3method(print,glm_betaselect) S3method(print,lav_betaselect) S3method(print,lm_betaselect) +S3method(print,summary.glm_betaselect) S3method(print,summary.lm_betaselect) +S3method(summary,glm_betaselect) S3method(summary,lm_betaselect) +S3method(vcov,glm_betaselect) S3method(vcov,lm_betaselect) +export(glm_betaselect) export(lav_betaselect) export(lm_betaselect) export(raw_output) diff --git a/NEWS.md b/NEWS.md index ac71b6b..73f907e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# betaselectr 0.0.1.3 +# betaselectr 0.0.1.4 - Added `lm_betaselect()` and related methods and helper functions. @@ -13,4 +13,9 @@ terms. (0.0.1.2) - Added some examples for the main - functions. (0.0.1.3) \ No newline at end of file + functions. (0.0.1.3) + +- Added `glm_betaselect()` and + related methods to + support models fitted by + `stats::glm()`. (0.0.1.4) diff --git a/R/lm_betaselect.R b/R/lm_betaselect.R index 819c77b..6cfbf14 100644 --- a/R/lm_betaselect.R +++ b/R/lm_betaselect.R @@ -7,23 +7,26 @@ #' skip categorical predictors in #' standardization. #' -#' @details The function [lm_betaselect()] -#' lets users +#' @details The functions [lm_betaselect()] +#' and [glm_betaselect()] +#' let users #' select which variables to be #' standardized when computing the -#' standardized solution. It has the +#' standardized solution. They have the #' following features: #' -#' - It automatically skips categorical +#' - They automatically skip categorical #' predictors (i.e., factor or string #' variables). #' -#' - It does not standardize product -#' term, which is incorrect. Instead, it +#' - They do not standardize a product +#' term, which is incorrect. Instead, +#' they #' compute the product term with its -#' component variables standardized. +#' component variables standardized, +#' if requested. #' -#' - It standardizes the selected +#' - They standardize the selected #' variables *before* fitting a model. #' Therefore, If a model has the term #' `log(x)` and `x` is one of the @@ -33,20 +36,22 @@ #' standardized `log(x)` which is #' difficult to interpret. #' -#' - It can be used to generate +#' - They can be used to generate #' nonparametric #' bootstrap confidence intervals for #' the standardized solution. Bootstrap #' confidence interval is better than -#' the least square confidence interval +#' the default confidence interval #' ignoring the standardization #' because it #' takes into account the sampling #' variance of the standard deviations. -#' It is one of the recommended methods +#' Preliminary support for bootstrap +#' confidence has been found #' for forming confidence intervals for #' coefficients involving standardized -#' variables (Jones & Waller, 2013). +#' variables in linear regression +#' (Jones & Waller, 2013). #' #' ## Problems With Common Approaches #' @@ -73,13 +78,13 @@ #' be more difficult to interpret when #' they are standardized (e.g., age). #' -#' ## How It Works +#' ## How The Function Work #' -#' It standardizes the original variables +#' They standardize the original variables #' *before* they are used in the -#' regression model. Therefore, strictly -#' speaking, it does not standardize -#' the predictors in a regression model, +#' model. Therefore, strictly +#' speaking, they do not standardize +#' the predictors in model, #' but standardize the *input variable* #' (Gelman et al., 2021). #' @@ -97,12 +102,16 @@ #' ## Methods #' #' The output of [lm_betaselect()] is -#' an `lm_betaselect`-class object. -#' It has the following methods:add +#' an `lm_betaselect`-class object, +#' and the output of [glm_betaselect()] +#' is a `glm_betaselect`-class object. +#' They have the following methods: #' #' - A `coef`-method for extracting #' the coefficients of the model. -#' (See [coef.lm_betaselect()] for details.) +#' (See [coef.lm_betaselect()] +#' and [coef.glm_betaselect()] +#' for details.) #' #' - A `vcov`-method for extracting the #' variance-covariance matrix of the @@ -110,7 +119,9 @@ #' If bootstrapping is requested, it #' can return the matrix based on the #' bootstrapping estimates. -#' (See [vcov.lm_betaselect()] for details.) +#' (See [vcov.lm_betaselect()] +#' and [vcov.glm_betaselect()] +#' for details.) #' #' - A `confint`-method for forming the #' confidence intervals of the @@ -118,20 +129,27 @@ #' If bootstrapping is requested, it #' can return the bootstrap confidence #' intervals. -#' (See [confint.lm_betaselect()] for details.) +#' (See [confint.lm_betaselect()] and +#' [confint.glm_betaselect()] +#' for details.) #' #' - A `summary`-method for printing the #' summary of the results, with additional #' information such as the number of #' bootstrap samples and which variables #' have been standardized. -#' (See [summary.lm_betaselect()] for details.) +#' (See [summary.lm_betaselect()] and +#' [summary.glm_betaselect()] +#' for details.) #' #' - An `anova`-method for printing the #' ANOVA table. Can also be used to #' compare two or more outputs of -#' [lm_betaselect()]. -#' (See [anova.lm_betaselect()] for details.) +#' [lm_betaselect()] or +#' [glm_betaselect()] +#' (See [anova.glm_betaselect()] +#' and [anova.glm_betaselect()] +#' for details.) #' #' - A `predict`-method for computing #' predicted values. It can be used to @@ -140,51 +158,67 @@ #' The data will be standardized before #' computing the predicted values in #' the models with standardization. -#' (See [predict.lm_betaselect()] for details.) +#' (See [predict.lm_betaselect()] and +#' [predict.glm_betaselect()] +#' for details.) #' #' - The default `update`-method for updating #' a call also works for an -#' `lm_betaselect` object. It can +#' `lm_betaselect` object or +#' a `glm_betaselect()` object. It can #' update the model in the same #' way it updates a model fitted by -#' [stats::lm()], and also update -#' arguments of [lm_betaselect()], +#' [stats::lm()] or [stats::glm()], +#' and also update +#' the arguments of [lm_betaselect()] +#' or [glm_betaselect()] #' such as the variables to be #' standardized. #' (See [stats::update()] for details.) #' #' Most other methods for the output -#' of [stats::lm()] should also work -#' on an `lm_betaselect`-class object. +#' of [stats::lm()] and [stats::glm()] +#' should also work +#' on an `lm_betaselect`-class object +#' or a `glm_betaselect`-class object, +#' respectively. #' Some of them will give the same #' results regardless of the variables -#' standardized. For example, +#' standardized. Examples are #' [rstandard()] and [cooks.distance()]. #' For some others, they should be used #' with cautions if they make use of #' the variance-covariance matrix -#' of the estimates because they may use -#' the least square version. +#' of the estimates. #' #' To use the methods for `lm` objects +#' or `glm` objects #' on the results without standardization, #' simply use [raw_output()]. For example, #' to get the fitted values without #' standardization, call #' `fitted(raw_output(x))`, where `x` -#' is the output of [lm_betaselect()]. +#' is the output of [lm_betaselect()] +#' or [glm_betaselect()]. #' #' @return #' The function [lm_betaselect()] #' returns an object of the class `lm_betaselect`, -#' which is similar to the output -#' of [lm()], with additional information -#' stored. -#' -#' @param ... For [lm_betaselect()], +#' The function [glm_betaselect()] +#' returns an object of the class +#' `glm_betaselect`. They are similar +#' in structure to the output of +#' [stats::lm()] and [stats::glm()], +#' with additional information stored. +#' +#' @param ... For [lm_betaselect()]. #' these arguments will be #' passed directly to [lm()]. For +#' [glm_betaselect()], these arguments +#' will be passed to [glm()]. +#' For #' the `print`-method of `lm_betaselect` +#' or `glm_betaselect` #' objects, this will be passed to #' other methods. #' @@ -224,8 +258,8 @@ #' `TRUE` and this argument is `TRUE`, #' parallel processing will be used to #' do bootstrapping. Default is `FALSE` -#' because bootstrapping in regression -#' by [lm()] is rarely slow. +#' because bootstrapping for models fitted +#' by [lm()] or [glm()] is rarely slow. #' #' @param ncpus If `do_boot` is `TRUE` #' and `parallel` is also `TRUE`, this @@ -243,6 +277,15 @@ #' whether load balancing will be used. #' Default is `TRUE`. #' +#' @param model_call The model function +#' to be called. +#' If `"lm"`, the default, the model will be fitted +#' by [stats::lm()]. If `"glm"`, the +#' model will be fitted by [stats::glm()]. +#' Users should call the corresponding +#' function directly rather than setting +#' this argument manually. +#' #' @author Shu Fai Cheung #' #' @references @@ -264,9 +307,10 @@ #' intervals for standardized regression coefficients. #' *Psychological Methods, 18*(4), 435--453. #' \doi{10.1037/a0033269} - #' -#' @seealso [print.lav_betaselect()] for its print method. +#' @seealso [print.lm_betaselect()] and +#' [print.glm_betaselect()] for the +#' `print`-methods. #' #' @examples #' @@ -309,7 +353,8 @@ lm_betaselect <- function(..., parallel = FALSE, ncpus = parallel::detectCores(logical = FALSE) - 1, progress = TRUE, - load_balancing = TRUE) { + load_balancing = TRUE, + model_call = c("lm", "glm")) { # Workflow # - Do regression on the unstandardized input variables. @@ -322,11 +367,14 @@ lm_betaselect <- function(..., # - Other SEs and CIs can be computed on request. # Do regression on the unstandardized input variables. + model_call <- match.arg(model_call) my_call <- match.call() lm_args <- as.list(my_call)[-1] my_formals <- names(formals()) lm_args[my_formals] <- NULL - lm_call <- as.call(c(str2lang("stats::lm"), lm_args)) + lm_call <- switch(model_call, + lm = as.call(c(str2lang("stats::lm"), lm_args)), + glm = as.call(c(str2lang("stats::glm"), lm_args))) lm_ustd <- eval(lm_call, envir = parent.frame()) vcov_ustd <- stats::vcov(lm_ustd) @@ -399,17 +447,77 @@ lm_betaselect <- function(..., bootstrap = bootstrap, ncpus = ncpus, progress = progress, - load_balancing = load_balancing) + load_balancing = load_balancing, + model_call = model_call) } else { boot_out <- NULL } out$lm_betaselect$boot_out <- boot_out - class(out) <- c("lm_betaselect", class(out)) + class(out) <- switch(model_call, + lm = c("lm_betaselect", class(out)), + glm = c("glm_betaselect", class(out))) out } +#' @examples +#' +#' data(data_test_mod_cat) +#' +#' data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +#' data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, +#' yes = 1, +#' no = 0) +#' # Standardize only iv +#' logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, +#' family = binomial, +#' data = data_test_mod_cat, +#' to_standardize = "iv") +#' summary(logistic_beta_x) +#' +#' logistic_beta_x +#' summary(logistic_beta_x) +#' +#' # Manually standardize iv and call glm() +#' +#' data_test_mod_cat$iv_z <- scale(data_test_mod_cat[, "iv"])[, 1] +#' +#' logistic_beta_x_manual <- glm(p ~ iv_z*mod + cov1 + cat1, +#' family = binomial, +#' data = data_test_mod_cat) +#' +#' coef(logistic_beta_x) +#' coef(logistic_beta_x_manual) +#' +#' # Standardize all numeric predictors +#' +#' logistic_beta_allx <- glm_betaselect(p ~ iv*mod + cov1 + cat1, +#' family = binomial, +#' data = data_test_mod_cat, +#' to_standardize = c("iv", "mod", "cov1")) +#' # Note that cat1 is not standardized +#' summary(logistic_beta_allx) +#' +#' @rdname lm_betaselect +#' @export + +glm_betaselect <- function(..., + to_standardize = NULL, + not_to_standardize = NULL, + do_boot = TRUE, + bootstrap = 100L, + iseed = NULL, + parallel = FALSE, + ncpus = parallel::detectCores(logical = FALSE) - 1, + progress = TRUE, + load_balancing = TRUE) { + my_call <- match.call() + my_call[[1]] <- str2lang("betaselectr::lm_betaselect") + my_call$model_call <- "glm" + eval(my_call, + envir = parent.frame()) + } #' @title Standardize Selected Variables #' #' @description Standardize selected @@ -463,7 +571,10 @@ lm_boot <- function(lm_args, bootstrap, ncpus, progress, - load_balancing) { + load_balancing, + model_call = c("lm", "glm")) { + + model_call <- match.arg(model_call) if (!is.null(iseed)) set.seed(iseed) @@ -475,24 +586,45 @@ lm_boot <- function(lm_args, lm_args_i <- lapply(lm_args, eval, envir = parent.frame()) - - tmpfct <- function(i) { - data_i <- input_data[i, ] - input_data_z <- betaselectr::std_data(data_i, - to_standardize = to_standardize) - lm_args_i$data <- data_i - ustd_i <- do.call(stats::lm, - lm_args_i) - lm_args_i$data <- input_data_z - std_i <- do.call(stats::lm, - lm_args_i) - list(coef_ustd = stats::coef(ustd_i), - vcov_ustd = stats::vcov(ustd_i), - sigma_mm_ustd = stats::cov(stats::model.matrix(ustd_i)), - coef_std = stats::coef(std_i), - vcov_std = stats::vcov(std_i), - sigma_mm_std = stats::cov(stats::model.matrix(std_i))) - } + if (model_call == "lm") { + # Code duplication is intended + tmpfct <- function(i) { + data_i <- input_data[i, ] + input_data_z <- betaselectr::std_data(data_i, + to_standardize = to_standardize) + lm_args_i$data <- data_i + ustd_i <- do.call(stats::lm, + lm_args_i) + lm_args_i$data <- input_data_z + std_i <- do.call(stats::lm, + lm_args_i) + list(coef_ustd = stats::coef(ustd_i), + vcov_ustd = stats::vcov(ustd_i), + sigma_mm_ustd = stats::cov(stats::model.matrix(ustd_i)), + coef_std = stats::coef(std_i), + vcov_std = stats::vcov(std_i), + sigma_mm_std = stats::cov(stats::model.matrix(std_i))) + } + } else { + # model_call -= "glm" + tmpfct <- function(i) { + data_i <- input_data[i, ] + input_data_z <- betaselectr::std_data(data_i, + to_standardize = to_standardize) + lm_args_i$data <- data_i + ustd_i <- do.call(stats::glm, + lm_args_i) + lm_args_i$data <- input_data_z + std_i <- do.call(stats::glm, + lm_args_i) + list(coef_ustd = stats::coef(ustd_i), + vcov_ustd = stats::vcov(ustd_i), + sigma_mm_ustd = stats::cov(stats::model.matrix(ustd_i)), + coef_std = stats::coef(std_i), + vcov_std = stats::vcov(std_i), + sigma_mm_std = stats::cov(stats::model.matrix(std_i))) + } + } if (parallel) { if (load_balancing && progress) { @@ -533,7 +665,8 @@ lm_boot <- function(lm_args, return(boot_out) } -#' @param x A `lm_betaselect`-class object. +#' @param x A `lm_betaselect`-class object +#' or `glm_betaselect`-class object. #' #' @param digits The number of significant #' digits to be printed for the @@ -585,18 +718,26 @@ print.lm_betaselect <- function(x, invisible(x) } -#' @param x A `lm_betaselect`-class object. +#' @rdname lm_betaselect +#' @export +print.glm_betaselect <- print.lm_betaselect + +#' @param x An `lm_betaselect` or +#' `glm_betaselect` object. #' #' @details #' The function [raw_output()] simply extracts #' the regression output by [stats::lm()] +#' or [stats::glm()] #' on the variables without standardization. #' #' @return #' The function [raw_output()] returns -#' an object of the class `lm`, which are +#' an object of the class `lm` or +#' `glm`, which are #' the results of fitting the model -#' to the data by [stats::lm()] without +#' to the data by [stats::lm()] +#' or [stats::glm()] without #' standardization. #' #' @examples diff --git a/R/lm_betaselect_methods.R b/R/lm_betaselect_methods.R index a0727d5..7715fe8 100644 --- a/R/lm_betaselect_methods.R +++ b/R/lm_betaselect_methods.R @@ -1,9 +1,10 @@ -#' @title Coefficients of an -#' 'lm_betaselect'-Class Object +#' @title Coefficients of +#' Beta-Select in Linear Models #' #' @description Return the estimates of #' coefficients in an -#' `lm_betaselect`-class object. +#' `lm_betaselect`-class or +#' `glm_betaselect`-class object. # #' @details By default, it extracts the #' regression coefficients *after* the @@ -18,8 +19,10 @@ #' regression coefficients. #' #' @param object The output of -#' [lm_betaselect()], or an -#' `lm_betaselect`-class object. +#' [lm_betaselect()] or +#' [glm_betaselect()], or an +#' `lm_betaselect`-class or +#' `glm_betaselect`-class object. #' #' @param complete If `TRUE`, it returns #' the full vector of coefficients, @@ -40,7 +43,8 @@ #' #' @author Shu Fai Cheung #' -#' @seealso [lm_betaselect()] +#' @seealso [lm_betaselect()] and +#' [glm_betaselect()] #' #' @examples #' @@ -69,13 +73,33 @@ coef.lm_betaselect <- function(object, } } -#' @title The 'vcov' Method for an -#' 'lm_betaselect'-Class Object +#' @examples +#' +#' data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +#' data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, +#' yes = 1, +#' no = 0) +#' logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, +#' data = data_test_mod_cat, +#' family = binomial, +#' to_standardize = "iv") +#' coef(logistic_beta_x) +#' coef(logistic_beta_x, type = "raw") +#' +#' @rdname coef.lm_betaselect +#' @export + +coef.glm_betaselect <- coef.lm_betaselect + +#' @title The 'vcov' Method for +#' 'lm_betaselect' and `glm_betaselect` +#' Objects #' #' @description Compute the #' variance-covariance matrix of #' estimates in the output of -#' [lm_betaselect()]. +#' [lm_betaselect()] or +#' [glm_betaselect()]. #' #' @details The type of #' variance-covariance matrix depends @@ -83,7 +107,7 @@ coef.lm_betaselect <- function(object, #' was requested, by default it returns #' the bootstrap variance-covariance #' matrix. Otherwise, it returns the -#' OLS (or WLS) variance-covariance +#' default variance-covariance #' matrix and raises a warning. #' #' Support for other type of @@ -92,6 +116,10 @@ coef.lm_betaselect <- function(object, #' #' @author Shu Fai Cheung #' +#' +#' @seealso [lm_betaselect()] and +#' [glm_betaselect()] +#' #' @return #' A matrix of the variances and #' covariances of the parameter @@ -99,18 +127,22 @@ coef.lm_betaselect <- function(object, #' #' @param object The output of #' [lm_betaselect()] -#' or an `lm_betaselect`-class object. +#' or an `lm_betaselect`-class object, +#' or the output of [glm_betaselect()] +#' or a `glm_betaselect`-class object. #' #' @param method The method used to #' compute the variance-covariance #' matrix. If bootstrapping was #' requested when calling -#' [lm_betaselect()] and this argument +#' [lm_betaselect()] or +#' [glm_betaselect()] and this argument #' is set to `"bootstrap"` or `"boot"`, #' the bootstrap variance-covariance #' matrix is returned. If bootstrapping #' was not requested or if this argument -#' is set to `"ls"`, then the usual `lm` +#' is set to `"ls"` or `"default"`, +#' then the usual `lm` or `glm` #' variance-covariance matrix is #' returned, with a warning raised #' unless `type` is `"raw"` or @@ -151,6 +183,7 @@ coef.lm_betaselect <- function(object, #' bootstrap = 100, #' iseed = 1234) #' vcov(lm_beta_x) +#' # A warning is expected for the following call #' vcov(lm_beta_x, method = "ls") #' vcov(lm_beta_x, type = "raw") #' @@ -159,7 +192,7 @@ coef.lm_betaselect <- function(object, # Adapted from vcov.std_selected() vcov.lm_betaselect <- function(object, - method = c("boot", "bootstrap", "ls"), + method = c("boot", "bootstrap", "ls", "default"), type = c("beta", "standardized", "raw", @@ -171,7 +204,8 @@ vcov.lm_betaselect <- function(object, method <- switch(method, boot = "boot", bootstrap = "boot", - ls = "ls") + ls = "ls", + default = "ls") type <- switch(type, beta = "beta", standardized = "beta", @@ -193,7 +227,7 @@ vcov.lm_betaselect <- function(object, } else { if (warn) { warning("With standardization, the variance-covariance matrix ", - "using OLS or WLS should not be used.") + "from 'lm()' or 'glm()' should not be used.") } NextMethod() } @@ -212,13 +246,39 @@ vcov.lm_betaselect <- function(object, } } -#' @title Confidence Interval for an -#' 'lm_betaselect'-Class Object +#' @examples +#' +#' data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +#' data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, +#' yes = 1, +#' no = 0) +#' # bootstrap should be set to 2000 or 5000 in real studies +#' logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, +#' data = data_test_mod_cat, +#' family = binomial, +#' to_standardize = "iv", +#' do_boot = TRUE, +#' bootstrap = 100, +#' iseed = 1234) +#' vcov(logistic_beta_x) +#' # A warning is expected for the following call +#' vcov(logistic_beta_x, method = "default") +#' vcov(logistic_beta_x, type = "raw") +#' +#' @rdname vcov.lm_betaselect +#' @export + +vcov.glm_betaselect <- vcov.lm_betaselect + +#' @title Confidence Interval for +#' 'lm_betaselect' or 'glm_betaselect' +#' Objects #' #' @description Return the confidence #' interval of the regression #' coefficients in the output of -#' [lm_betaselect()]. +#' [lm_betaselect()] or +#' [glm_betaselect()]. #' #' @details #' The type of @@ -227,7 +287,7 @@ vcov.lm_betaselect <- function(object, #' was requested, by default it returns #' the percentile bootstrap confidence #' intervals. Otherwise, it returns the -#' OLS (or WLS) confidence intervals +#' default confidence intervals #' and raises a warning for the #' standardized solution. #' @@ -241,7 +301,8 @@ vcov.lm_betaselect <- function(object, #' of coefficients. #' #' @param object The output of -#' [lm_betaselect()]. +#' [lm_betaselect()] or +#' [glm_betaselect()]. #' #' @param parm The terms for which #' the confidence intervals are returned. @@ -412,13 +473,151 @@ confint.lm_betaselect <- function(object, } } -#' @title ANOVA Tables for an -#' 'lm_betaselect'-Class Object +#' @param trace Logical. Whether profiling +#' will be traced. See +#' [stats::confint.glm()] for details. +#' ignored if `method` is `"boot"` or +#' `"bootstrap"`. +#' +#' @param test The test used for +#' profiling. See [stats::confint.glm] +#' for details. +#' ignored if `method` is `"boot"` or +#' `"bootstrap"`. +#' +#' @examples +#' +#' data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +#' data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, +#' yes = 1, +#' no = 0) +#' # bootstrap should be set to 2000 or 5000 in real studies +#' logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, +#' data = data_test_mod_cat, +#' family = binomial, +#' to_standardize = "iv", +#' do_boot = TRUE, +#' bootstrap = 100, +#' iseed = 1234) +#' +#' confint(logistic_beta_x, method = "default") +#' confint(logistic_beta_x, type = "raw") +#' +#' @rdname confint.lm_betaselect +#' @export + +# Code duplication is intentional +confint.glm_betaselect <- function(object, + parm, + level = .95, + trace = FALSE, + test = c("LRT", "Rao"), + method = c("boot", "bootstrap", "default", "ls"), + type = c("beta", + "standardized", + "raw", + "unstandardized"), + warn = TRUE, + boot_type = c("perc", "bc"), + ...) { + test <- match.arg(test) + method <- match.arg(method) + type <- match.arg(type) + boot_type <- match.arg(boot_type) + if (missing(parm)) { + parm <- stats::variable.names(object) + } + method <- switch(method, + boot = "boot", + bootstrap = "boot", + ls = "ls", + default = "ls") + type <- switch(type, + beta = "beta", + standardized = "beta", + raw = "raw", + unstandardized = "raw") + if (identical(method, "boot") && is.null(object$lm_betaselect$boot_out)) { + warning("Bootstrap estimates not available; ", + "'method' changed to 'ls' or 'default'.") + method <- "ls" + } + if (type == "beta") { + if (method == "boot") {1 + boot_out <- object$lm_betaselect$boot_out + boot_idx <- attr(boot_out, "boot_idx") + boot_est <- lapply(parm, function(y) { + out <- sapply(boot_out, function(x) { + x$coef_std[y] + }) + out + }) + est <- stats::coef(object, + type = type)[parm] + out <- mapply(boot_ci_internal, + t0 = est, + t = boot_est, + level = level, + boot_type = boot_type, + add_names = TRUE, + SIMPLIFY = FALSE) + out <- do.call(rbind, out) + return(out) + } else { + if (warn) { + warning("With standardization, the confidence interval", + "from 'lm()' or 'glm()' should not be used.") + } + class(object) <- "glm" + out <- stats::confint(object, + parm = parm, + level = level, + trace = trace, + test = test, + ...) + return(out) + } + } else { + if (method == "boot") { + boot_out <- object$lm_betaselect$boot_out + boot_idx <- attr(boot_out, "boot_idx") + boot_est <- lapply(parm, function(y) { + out <- sapply(boot_out, function(x) { + x$coef_ustd[y] + }) + out + }) + est <- stats::coef(object, + type = type)[parm] + out <- mapply(boot_ci_internal, + t0 = est, + t = boot_est, + level = level, + boot_type = boot_type, + add_names = TRUE, + SIMPLIFY = FALSE) + out <- do.call(rbind, out) + return(out) + } else { + out <- stats::confint(object$lm_betaselect$ustd, + parm = parm, + level = level, + trace = trace, + test = test) + return(out) + } + } + } + +#' @title ANOVA Tables For +#' 'lm_betaselect' and 'glm_betaselect' +#' Objects #' #' @description Return the analysis #' of variance tables for #' the outputs of -#' [lm_betaselect()]. +#' [lm_betaselect()] and +#' [glm_betaselect()]. #' #' @details #' By default, it calls [stats::anova()] @@ -435,10 +634,12 @@ confint.lm_betaselect <- function(object, #' structure. #' #' @param object The output of -#' [lm_betaselect()]. +#' [lm_betaselect()] or +#' [glm_betaselect()]. #' #' @param ... Additional outputs -#' of [lm_betaselect()]. +#' of [lm_betaselect()] or +#' [glm_betaselect()]. #' #' @param type String. If #' `"unstandardized"` or `"raw"`, the @@ -449,6 +650,15 @@ confint.lm_betaselect <- function(object, #' variables standardized are returned. #' Default is `"beta"`. #' +#' @param dispersion To be passed to +#' [stats::anova.glm()]. The dispersion +#' parameter. Default ia `NULL` and it +#' is extracted from the model. +#' +#' @param test String. The test to be +#' conducted. Please refer to +#' [stats::anova.glm()] for details. +#' #' @author Shu Fai Cheung #' #' @seealso [lm_betaselect()] @@ -489,6 +699,48 @@ anova.lm_betaselect <- function(object, } } +#' @examples +#' +#' data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +#' data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, +#' yes = 1, +#' no = 0) +#' logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, +#' data = data_test_mod_cat, +#' family = binomial, +#' to_standardize = "iv") +#' anova(logistic_beta_x) +#' anova(logistic_beta_x, type = "raw") +#' +#' @rdname anova.lm_betaselect +#' @export + +anova.glm_betaselect <- function(object, + ..., + type = c("beta", + "standardized", + "raw", + "unstandardized"), + dispersion = NULL, + test = NULL) { + type <- match.arg(type) + type <- switch(type, + beta = "beta", + standardized = "beta", + raw = "raw", + unstandardized = "raw") + if (type == "beta") { + type <- NULL + NextMethod() + } else { + objects <- c(list(object), list(...)) + ustds <- lapply(objects, function(x) x$lm_betaselect$ustd) + out <- do.call(stats::anova, + ustds) + return(out) + } + } + #' @title Summary of an #' 'lm_betaselect'-Class Object #' @@ -958,126 +1210,554 @@ print_fstatistic <- function(fstatistic, f_txt, ", p ", p_txt, "\n", sep = "") } -# #' @title Extract Log-Likelihood -# #' -# #' @description Extract the -# #' log-likelihood of an `lm_betaselect` -# #' object. -# #' -# #' @details -# #' It simply passes the model with -# #' or without selected variables -# #' standardized to the method -# #' [stats::logLik.lm()]. Please refer to -# #' the help page of [stats::logLik.lm()] -# #' for details. -# #' -# #' @return -# #' It returns an object of the class -# #' `logLik`, the same object returned -# #' by [stats::logLik.lm()]. -# #' -# #' @param object An `lm_betaselect`-class -# #' object. -# #' -# #' @param REML Whether the restricted -# #' log-likelihood is returned. Default -# #' is `FALSE`. -# #' -# #' @param type The model from which the -# #' log-likelihood is returned. For -# #' `"beta"` or `"standardized"`, the -# #' model is the one after selected -# #' variables standardized. For `"raw"` -# #' or `"unstandardized"`, the model is -# #' the one before standardization was -# #' done. -# #' -# #' @param ... Optional arguments. -# #' To be passed to [stats::logLik.lm()]. -# #' -# #' @author Shu Fai Cheung -# #' -# #' @seealso [lm_betaselect()] and [stats::logLik.lm()] -# #' -# #' @examples -# #' -# #' data(data_test_mod_cat) -# #' -# #' lm_beta_x <- lm_betaselect(dv ~ iv*mod + cov1 + cat1, -# #' data = data_test_mod_cat, -# #' to_standardize = "iv") -# #' logLik(lm_beta_x) -# #' logLik(lm_beta_x, type = "raw") -# #' -# #' @export - -# logLik.lm_betaselect <- function(object, -# REML = FALSE, -# type = c("beta", "standardized", -# "raw", "unstandardized"), -# ...) { -# type <- match.arg(type) -# type <- switch(type, -# beta = "beta", -# standardized = "beta", -# raw = "raw", -# unstandardized = "raw") -# if (type == "beta") { -# NextMethod(object = object, -# REML = REML, -# ...) -# } else { -# # type == "raw" -# stats::logLik(object = object$lm_betaselect$ustd, -# REML = REML, -# ...) -# } -# } - -# #' @title Extract AIC -# #' -# #' @description Extract the -# #' Akaike Information Criterion (AIC) -# #' from an `lm_betaselect` object. -# #' -# #' @details -# #' It simply passes the model with -# #' or without selected variables -# #' standardized to [stats::extractAIC()]. -# #' Please refer to -# #' the help page of [stats::extractAIC()] -# #' for details. -# #' -# #' @return -# #' It returns a numeric vector of -# #' two elements, which is simply the -# #' output of [stats::extractAIC()] -# #' on the requested model. -# #' -# #' @param fit An `lm_betaselect`-class -# #' object. -# #' -# #' @param scale To be passed -# #' to [stats::extractAIC()]. See its -# #' help page for details. -# #' -# #' @param k The weight of the -# #' equivalent degrees of freedom to be -# #' used in the computation of AIC. -# #' See [stats::extractAIC()] for details. -# #' -# #' @param type The model from which the -# #' AIC is returned. For -# #' `"beta"` or `"standardized"`, the -# #' model is the one after selected -# #' variables standardized. For `"raw"` -# #' or `"unstandardized"`, the model is -# #' the one before standardization was -# #' done. -# #' -# #' @param ... Optional arguments. -# #' To be passed to [stats::extractAIC()]. +#' @title Summary of an +#' 'glm_betaselect'-Class Object +#' +#' @description The `summary` method +#' for `glm_betaselect`-class objects. +#' +#' @details +#' By default, it returns a +#' `summary.glm_betaselect`-class object +#' for the results with selected variables +#' standardized. By setting `type` to +#' `"raw"` or `"unstandardized"`, it +#' returns the summary for the results +#' *before* standardization. +#' +#' @return +#' It returns an object of class +#' `summary.glm_betaselect`, which is +#' similar to the output of +#' [stats::summary.glm()], with additional +#' information on the standardization +#' and bootstrapping, if requested. +#' +#' @param object The output of +#' [glm_betaselect()]. +#' +#' @param dispersion The dispersion +#' parameter. If `NULL`, then it is +#' extracted from the object. If +#' a scalar, it will be used as +#' the dispersion parameter. See +#' [stats::summary.glm()] for details. +#' +#' @param correlation If `TRUE`, the +#' correlation matrix of the estimates +#' will be returned. The same argument +#' in [stats::summary.glm()]. Default +#' is `FALSE`. +#' +#' @param symbolic.cor If `TRUE`, +#' correlations are printed in symbolic +#' form as in [stats::summary.glm()]. +#' Default is `FALSE`. +#' +#' @param trace Logical. Whether +#' profiling will be traced when forming +#' the confidence interval if +#' `se_method` is `"default"`, `"z"`, or +#' `"glm"`. Ignored if `ci` is `FALSE`. +#' See [stats::confint.glm()] for +#' details. +#' +#' @param test The test used for +#' `se_method` is `"default"`, `"z"`, or +#' `"glm"`. Ignored if `ci` is `FALSE`. +#' See [stats::confint.glm()] for +#' details. +#' +#' @param se_method The method used to +#' compute the standard errors and +#' confidence intervals (if requested). +#' If bootstrapping was +#' requested when calling +#' [glm_betaselect()] and this argument +#' is set to `"bootstrap"` or `"boot"`, +#' the bootstrap standard errors are +#' returned. If bootstrapping +#' was not requested or if this argument +#' is set to `"z"`, `"glm"`, or `"default"`, +#' then the usual `glm` +#' standard errors are +#' returned, with a warning raised +#' unless `type` is `"raw"` or +#' `"unstandardized".` +#' Default is `"boot"`. +#' +#' @param ci Logical. Whether +#' confidence intervals are computed. +#' Default is `FALSE.` +#' +#' @param level The level of confidence, +#' default is .95, returning the 95% +#' confidence interval. +#' +#' @param boot_type The type of +#' bootstrap confidence intervals, +#' if requested. +#' Currently, it supports `"perc"`, +#' percentile bootstrap confidence +#' intervals, and `"bc"`, bias-corrected +#' bootstrap confidence interval. +#' +#' @param boot_pvalue_type The type +#' of *p*-values if `se_method` is +#' `"boot"` or `"bootstrap"`. If `"norm"`, +#' then the *z* score is used to compute +#' the *p*-value using a +#' standard normal distribution. +#' If `"asymmetric"`, the default, then +#' the method presented in +#' Asparouhov and Muthén (2021) is used +#' to compute the *p*-value based on the +#' bootstrap distribution. +#' +#' @param type String. If +#' `"unstandardized"` or `"raw"`, the +#' output *before* standardization +#' are used If `"beta"` or +#' `"standardized"`, then the +#' output *after* selected +#' variables standardized are returned. +#' Default is `"beta"`. +#' +#' @param ... Additional arguments +#' passed to other methods. +#' +#' @author Shu Fai Cheung +#' +#' @references +#' Asparouhov, A., & Muthén, B. (2021). Bootstrap p-value computation. +#' Retrieved from https://www.statmodel.com/download/FAQ-Bootstrap%20-%20Pvalue.pdf +#' +#' @seealso [glm_betaselect()] +#' +#' @examples +#' +#' data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +#' data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, +#' yes = 1, +#' no = 0) +#' # bootstrap should be set to 2000 or 5000 in real studies +#' logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, +#' data = data_test_mod_cat, +#' family = binomial, +#' to_standardize = "iv", +#' do_boot = TRUE, +#' bootstrap = 100, +#' iseed = 1234) +#' summary(logistic_beta_x) +#' +#' @rdname summary.glm_betaselect +#' +#' @export +# Code duplication intentional +summary.glm_betaselect <- function(object, + dispersion = NULL, + correlation = FALSE, + symbolic.cor = FALSE, + trace = FALSE, + test = c("LRT", "Rao"), + se_method = c("boot", "bootstrap", + "z", "glm", "default"), + ci = FALSE, + level = .95, + boot_type = c("perc", "bc"), + boot_pvalue_type = c("asymmetric", "norm"), + type = c("beta", + "standardized", + "raw", + "unstandardized"), + ...) { + se_method <- match.arg(se_method) + type <- match.arg(type) + se_method <- switch(se_method, + boot = "boot", + bootstrap = "boot", + z = "default", + glm = "default", + default = "default") + type <- switch(type, + beta = "beta", + standardized = "beta", + raw = "raw", + unstandardized = "raw") + boot_type <- match.arg(boot_type) + boot_pvalue_type <- match.arg(boot_pvalue_type) + if (identical(se_method, "boot") && is.null(object$lm_betaselect$boot_out)) { + warning("Bootstrap estimates not available; ", + "'se_method' changed to 'default'.") + se_method <- "default" + } + if (type == "beta") { + out <- NextMethod() + } else { + # type = "raw" + out <- stats::summary.glm(object = object$lm_betaselect$ustd, + dispersion = dispersion, + correlation = correlation, + symbolic.cor = symbolic.cor, + ...) + } + out$lm_betaselect$summary_call <- match.call() + out$lm_betaselect$call <- object$lm_betaselect$call + out$lm_betaselect$to_standardize <- object$lm_betaselect$to_standardize + out$lm_betaselect$se_method <- se_method + out$lm_betaselect$ci <- ci + out$lm_betaselect$level <- level + out$lm_betaselect$boot_type <- boot_type + out$lm_betaselect$type <- type + out$lm_betaselect$boot_pvalue_type <- boot_pvalue_type + class(out) <- c("summary.glm_betaselect", class(out)) + if (se_method == "boot") { + boot_out <- object$lm_betaselect$boot_out + out$lm_betaselect$bootstrap <- length(boot_out) + boot_est <- sapply(boot_out, function(x) { + x$coef_std + }) + boot_est_se <- apply(boot_est, 1, stats::sd, simplify = TRUE) + out$coefficients[, "Std. Error"] <- boot_est_se + z_values <- out$coefficients[, "Estimate"] / boot_est_se + out$coefficients[, "z value"] <- z_values + i <- which(colnames(out$coefficients) == "z value") + colnames(out$coefficients)[i] <- "z value" + if (boot_pvalue_type == "asymmetric") { + boot_est_list <- split(boot_est, rownames(boot_est)) + boot_pvalues <- sapply(boot_est_list, + est2p, + h0 = 0) + } else { + # boot_pvalue_type == "norm" + boot_pvalues <- stats::pnorm(abs(z_values), + lower.tail = FALSE) * 2 + } + out$coefficients[, "Pr(>|z|)"] <- boot_pvalues + i <- which(colnames(out$coefficients) == "Pr(>|z|)") + colnames(out$coefficients)[i] <- switch(boot_pvalue_type, + asymmetric = "Pr(Boot)", + norm = "Pr(>|z|)") + } else { + # se_method == "default" + # No need to change + } + if (ci) { + out_ci <- confint.glm_betaselect(object, + level = level, + trace = trace, + test = test, + method = se_method, + type = type, + warn = FALSE, + boot_type = boot_type) + colnames(out_ci) <- c("CI.Lower", "CI.Upper") + i <- which(colnames(out$coefficients) == "Estimate") + out_coef <- out$coefficients + out_coef <- cbind(out_coef[, seq_len(i), drop = FALSE], + out_ci, + out_coef[, -seq_len(i), drop = FALSE]) + out$coefficients <- out_coef + } + out + } + +#' @details +#' The `print` method of +#' `summary.glm_betaselect`-class objects +#' is adapted from +#' [stdmod::print.summary.std_selected()]. +#' +#' @return +#' The `print`-method of +#' `summary.glm_betaselect` is called +#' for its side effect. The object `x` +#' is returned invisibly. +#' +#' @param x The output of +#' [summary.glm_betaselect()]. +#' +#' @param est_digits The number of +#' digits after the decimal to be +#' displayed for the coefficient +#' estimates, their standard errors, and +#' confidence intervals (if present). +#' Note that the values will be rounded +#' to this number of digits before +#' printing. If all digits at this +#' position are zero for all values, the +#' values may be displayed with fewer +#' digits. Note that the coefficient +#' table is printed by +#' [stats::printCoefmat()]. If some +#' numbers are vary large, the number of +#' digits after the decimal may be +#' smaller than `est_digits` due to a +#' limit on the column width. +#' +#' @param signif.stars Whether "stars" +#' (asterisks) are printed to denote +#' the level of significance achieved +#' for each coefficient. Default is +#' `TRUE`. +#' +#' @param z_digits The number of digits +#' after the decimal to be displayed for +#' the *z* or similar statistic (in the +#' column `"z value"`). +#' +#' @param show.residuals If `TRUE`, +#' a summary of the deviance residuals +#' will be printed. Default is `FALSE`. +#' +#' @param pvalue_less_than If a +#' *p*-value is less than this value, it +#' will be displayed with `"<(this +#' value)".` For example, if +#' `pvalue_less_than` is .001, the +#' default, *p*-values less than .001 +#' will be displayed as `<.001`. This +#' value also determines the printout of +#' the *p*-value of the *F* statistic. +#' (This argument does what `eps.Pvalue` +#' does in [stats::printCoefmat()].) +#' +#' +#' @rdname summary.glm_betaselect +#' +#' @export +# Code duplication intentional +print.summary.glm_betaselect <- function(x, + est_digits = 3, + symbolic.cor = x$symbolic.cor, + signif.stars = getOption("show.signif.stars"), + show.residuals = FALSE, + z_digits = 3, + pvalue_less_than = .001, + ...) { + cat("Call to glm_betaselect():\n") + print(x$lm_betaselect$call) + to_standardize <- x$lm_betaselect$to_standardize + type <- x$lm_betaselect$type + level <- x$lm_betaselect$level + level_str <- paste0(formatC(level * 100, digits = 1, + format = "f"), + "%") + if (length(to_standardize) > 0) { + tmp <- paste(to_standardize, collapse = ", ") + tmp <- strwrap(tmp) + } else { + tmp <- "[Nil]" + } + cat("\nVariable(s) standardized:", + tmp, "\n") + + x$coefficients[, "Estimate"] <- round(x$coefficients[, "Estimate"], est_digits) + x$coefficients[, "Std. Error"] <- round(x$coefficients[, "Std. Error"], est_digits) + if (x$lm_betaselect$ci) { + x$coefficients[, "CI.Lower"] <- round(x$coefficients[, "CI.Lower"], est_digits) + x$coefficients[, "CI.Upper"] <- round(x$coefficients[, "CI.Upper"], est_digits) + } + i <- match(c("t value", "z value"), colnames(x$coefficients)) + i <- i[!is.na(i)] + x$coefficients[, i] <- round(x$coefficients[, i], z_digits) + NextMethod(digits = est_digits, + eps.Pvalue = pvalue_less_than, + dig.tst = z_digits) + + cat("\n") + + tmp <- character(0) + tmp <- c(tmp, "Note:") + tmp <- c(tmp, + strwrap(switch(type, + beta = "- Results *after* standardization are reported.", + raw = "- Results *before* standardization are reported."), + exdent = 2)) + if (x$lm_betaselect$se_method == "boot") { + tmp <- c(tmp, + strwrap("- Nonparametric bootstrapping conducted.", + exdent = 2)) + tmp <- c(tmp, + strwrap(paste0("- The number of bootstrap samples is ", + x$lm_betaselect$bootstrap, "."), + exdent = 2)) + tmp <- c(tmp, + strwrap("- Standard errors are bootstrap standard errors.", + exdent = 2)) + tmp <- c(tmp, + strwrap("- Z values are computed by 'Estimate / Std. Error'.", + exdent = 2)) + tmp <- c(tmp, + strwrap(switch(x$lm_betaselect$boot_pvalue_type, + asymmetric = "- The bootstrap p-values are asymmetric p-values by Asparouhov and Muth\u00e9n (2021).", + norm = "- The bootstrap p-values are based on standard normal distribution using z values."), + exdent = 2)) + if (x$lm_betaselect$ci) { + boot_type_str <- switch(x$lm_betaselect$boot_type, + perc = "Percentile", + bc = "Bias-corrected") + tmp <- c(tmp, + strwrap(paste0("- ", + boot_type_str, + " bootstrap ", + level_str, + " confidence interval reported."), + exdent = 2)) + } + } else { + # se_method == "ls" + tmp <- c(tmp, + strwrap("- Standard errors are least-squares standard errors.", + exdent = 2)) + tmp <- c(tmp, + strwrap("- Z values are computed by 'Estimate / Std. Error'.", + exdent = 2)) + tmp <- c(tmp, + strwrap("- P-values are usual z-test p-values.", + exdent = 2)) + if ((length(to_standardize) > 0) && + type == "beta") { + tmp <- c(tmp, + strwrap(paste0("- Default standard errors, z values, p-values, and confidence intervals (if reported) ", + "should not be used for coefficients involved in standardization."), + exdent = 2)) + } + if (x$lm_betaselect$ci) { + tmp <- c(tmp, + strwrap(paste0("- ", + "Default ", + level_str, + " confidence interval reported."), + exdent = 2)) + } + } + cat(tmp, sep = "\n") + invisible(x) + } + + + +# #' @title Extract Log-Likelihood +# #' +# #' @description Extract the +# #' log-likelihood of an `lm_betaselect` +# #' object. +# #' +# #' @details +# #' It simply passes the model with +# #' or without selected variables +# #' standardized to the method +# #' [stats::logLik.lm()]. Please refer to +# #' the help page of [stats::logLik.lm()] +# #' for details. +# #' +# #' @return +# #' It returns an object of the class +# #' `logLik`, the same object returned +# #' by [stats::logLik.lm()]. +# #' +# #' @param object An `lm_betaselect`-class +# #' object. +# #' +# #' @param REML Whether the restricted +# #' log-likelihood is returned. Default +# #' is `FALSE`. +# #' +# #' @param type The model from which the +# #' log-likelihood is returned. For +# #' `"beta"` or `"standardized"`, the +# #' model is the one after selected +# #' variables standardized. For `"raw"` +# #' or `"unstandardized"`, the model is +# #' the one before standardization was +# #' done. +# #' +# #' @param ... Optional arguments. +# #' To be passed to [stats::logLik.lm()]. +# #' +# #' @author Shu Fai Cheung +# #' +# #' @seealso [lm_betaselect()] and [stats::logLik.lm()] +# #' +# #' @examples +# #' +# #' data(data_test_mod_cat) +# #' +# #' lm_beta_x <- lm_betaselect(dv ~ iv*mod + cov1 + cat1, +# #' data = data_test_mod_cat, +# #' to_standardize = "iv") +# #' logLik(lm_beta_x) +# #' logLik(lm_beta_x, type = "raw") +# #' +# #' @export + +# logLik.lm_betaselect <- function(object, +# REML = FALSE, +# type = c("beta", "standardized", +# "raw", "unstandardized"), +# ...) { +# type <- match.arg(type) +# type <- switch(type, +# beta = "beta", +# standardized = "beta", +# raw = "raw", +# unstandardized = "raw") +# if (type == "beta") { +# NextMethod(object = object, +# REML = REML, +# ...) +# } else { +# # type == "raw" +# stats::logLik(object = object$lm_betaselect$ustd, +# REML = REML, +# ...) +# } +# } + +# #' @title Extract AIC +# #' +# #' @description Extract the +# #' Akaike Information Criterion (AIC) +# #' from an `lm_betaselect` object. +# #' +# #' @details +# #' It simply passes the model with +# #' or without selected variables +# #' standardized to [stats::extractAIC()]. +# #' Please refer to +# #' the help page of [stats::extractAIC()] +# #' for details. +# #' +# #' @return +# #' It returns a numeric vector of +# #' two elements, which is simply the +# #' output of [stats::extractAIC()] +# #' on the requested model. +# #' +# #' @param fit An `lm_betaselect`-class +# #' object. +# #' +# #' @param scale To be passed +# #' to [stats::extractAIC()]. See its +# #' help page for details. +# #' +# #' @param k The weight of the +# #' equivalent degrees of freedom to be +# #' used in the computation of AIC. +# #' See [stats::extractAIC()] for details. +# #' +# #' @param type The model from which the +# #' AIC is returned. For +# #' `"beta"` or `"standardized"`, the +# #' model is the one after selected +# #' variables standardized. For `"raw"` +# #' or `"unstandardized"`, the model is +# #' the one before standardization was +# #' done. +# #' +# #' @param ... Optional arguments. +# #' To be passed to [stats::extractAIC()]. # #' # #' @author Shu Fai Cheung # #' @@ -1441,22 +2121,125 @@ predict.lm_betaselect <- function(object, } } +#' @title Predict Method for a 'glm_betaselect' Object +#' +#' @description Compute the predicted +#' values in a model fitted by +#' [glm_betaselect()]. +#' +#' @details +#' It simply passes the model *before* +#' or *after* selected variables +#' are standardized to the +#' `predict`-method of a `glm` object. +#' +#' ## IMPORTANT +#' +#' Some statistics, such as prediction +#' or confidence interval, which make use +#' of the sampling variances and +#' covariances of coefficient estimates +#' *may* not be applicable to the +#' models with one or more variables +#' standardized. Therefore, they should +#' only be used for exploratory purpose. +#' +#' @return +#' It returns the output of [stats::predict.glm()]. +#' +#' @param object A `glm_betaselect`-class +#' object. +#' +#' @param model_type The model from which the +#' the predicted values are computed. +#' For +#' `"beta"` or `"standardized"`, the +#' model is the one after selected +#' variables standardized. For `"raw"` +#' or `"unstandardized"`, the model is +#' the one before standardization was +#' done. +#' +#' @param newdata If set to a data +#' frame, the predicted values are +#' computed using this data frame. +#' The data must be unstandardized. +#' That is, the variables are of the +#' same units as in the data frame +#' used in [glm_betaselect()]. If +#' `model_type` is `"beta"` or +#' `"standardized"`, it will be +#' standardized using the setting +#' of `to_standardize` when `object` +#' is created in [glm_betaselect()]. +#' +#' @param ... Arguments +#' to be passed to [stats::predict.glm()]. +#' Please refer to the help page of +#' [stats::predict.glm()]. +#' +#' @author Shu Fai Cheung +#' +#' @seealso [glm_betaselect()] and [stats::predict.glm()] +#' +#' @examples +#' +#' data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +#' data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, +#' yes = 1, +#' no = 0) +#' logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, +#' data = data_test_mod_cat, +#' family = binomial, +#' to_standardize = "iv") +#' +#' predict(logistic_beta_x) +#' predict(logistic_beta_x, model_type = "raw") +#' +#' @export + +predict.glm_betaselect <- function(object, + model_type = c("beta", "standardized", + "raw", "unstandardized"), + newdata, + ...) { + model_type <- match.arg(model_type) + model_type <- switch(model_type, + beta = "beta", + standardized = "beta", + raw = "raw", + unstandardized = "raw") + to_standardize <- object$lm_betaselect$to_standardize + if (model_type == "beta") { + if (!missing(newdata)) { + newdata_std <- std_data(newdata, + to_standardize = to_standardize) + NextMethod(newdata = newdata_std) + } else { + NextMethod() + } + } else { + object <- object$lm_betaselect$ustd + NextMethod() + } + } + # #' @title Update and Re-fit a Call to -# #' 'lm_betaselect()' +# #' 'glm_betaselect()' # #' # #' @description The `update`-method -# #' for an `lm_betaselect`-class objects. +# #' for a `glm_betaselect`-class objects. # #' # #' @details This works in the same way # #' the default `update`-method does for -# #' the output of [stats::lm()]. +# #' the output of [stats::glm()]. # #' # #' @return # #' It returns the output of -# #' [lm_betaselect()] with the updated +# #' [glm_betaselect()] with the updated # #' call, such as the updated model. # #' -# #' @param object An `lm_betaselect`-class +# #' @param object An `glm_betaselect`-class # #' object. # #' # #' @param formula. Changes to the formula, @@ -1466,7 +2249,7 @@ predict.lm_betaselect <- function(object, # #' additional arguments # #' to the call, as in the default # #' [update()] method. Ignored by -# #' [getCall.lm_betaselect()]. +# #' [getCall.glm_betaselect()]. # #' # #' @param evaluate Whether the updated # #' call will be evaluated. Default is @@ -1474,25 +2257,29 @@ predict.lm_betaselect <- function(object, # #' # #' @author Shu Fai Cheung # #' -# #' @seealso [lm_betaselect()] and [stats::update()] +# #' @seealso [glm_betaselect()] and [stats::update()] # #' # #' @examples # #' # #' data(data_test_mod_cat) # #' -# #' lm_beta_x <- lm_betaselect(dv ~ iv*mod + cov1, -# #' data = data_test_mod_cat, -# #' to_standardize = "iv") -# #' summary(lm_beta_x) -# #' lm_beta_x2 <- update(lm_beta_x, ~ . + cat1) -# #' summary(lm_beta_x2) +# #' data_test_mod_cat$p <- ifelse(data_test_mod_cat$dv > +# #' mean(data_test_mod_cat$dv), +# #' yes = 1, +# #' no = 0) +# #' logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1, +# #' data = data_test_mod_cat, +# #' to_standardize = "iv") +# #' summary(logistic_beta_x) +# #' logistic_beta_x2 <- update(logistic_beta_x, ~ . + cat1) +# #' summary(logistic_beta_x) # #' # #' @export -# update.lm_betaselect <- function(object, -# formula., -# ..., -# evaluate = TRUE) { +# update.glm_betaselect <- function(object, +# formula., +# ..., +# evaluate = TRUE) { # # Adapted from the default update # # By default, get the call to lm_betaselect() # # call <- object$lm_betaselect$call @@ -1513,27 +2300,32 @@ predict.lm_betaselect <- function(object, # # else call # } -#' @title Extract the Call From an -#' 'lm_betaselect' Object +#' @title Call in an +#' 'lm_betaselect' or 'glm_betaselect' +#' Object #' #' @description The `getCall`-method -#' for an `lm_betaselect`-class objects. +#' for an `lm_betaselect`-class or +#' `glm_betaselectd`-class objects. #' #' @details This works in the same way #' the default `getCall`-method does for -#' the output of [stats::lm()]. +#' the outputs of [stats::lm()] +#' and [stats::glm()]. #' #' @return #' It returns the call requested. #' - #' @param x An `lm_betaselect`-class +#' or `glm_betaselect`-class #' object from which the call is to #' be extracted. #' #' @param what Which call to extract. -#' For `"lm_betaselect"`, the call -#' to [lm_betaselect()] is extracted. +#' For `"lm_betaselect"` or +#' `"glm_betaselect"` the call +#' to [lm_betaselect()] +#' or [glm_betaselect()] is extracted. #' For #' `"beta"` or `"standardized"`, the #' call used to fit the model *after* @@ -1549,7 +2341,8 @@ predict.lm_betaselect <- function(object, #' #' @author Shu Fai Cheung #' -#' @seealso [lm_betaselect()] and [stats::getCall()] +#' @seealso [lm_betaselect()], +#' [glm_betaselect()], and [stats::getCall()] #' #' @examples #' @@ -1584,4 +2377,28 @@ getCall.lm_betaselect <- function(x, beta = x$call, raw = x$lm_betaselect$ustd$call) return(out) - } \ No newline at end of file + } + +#' @rdname getCall.lm_betaselect +#' @export + +getCall.glm_betaselect <- function(x, + what = c("glm_betaselect", + "beta", + "standardized", + "raw", + "unstandardized"), + ...) { + what <- match.arg(what) + what <- switch(what, + glm_betaselect = "lm_betaselect", + beta = "beta", + standardized = "beta", + raw = "raw", + unstandardized = "raw") + out <- switch(what, + lm_betaselect = x$lm_betaselect$call, + beta = x$call, + raw = x$lm_betaselect$ustd$call) + return(out) + } diff --git a/README.md b/README.md index ed9fc79..bedbde5 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Not ready for use. # betaselectr: Do selective standardization in structural equation models and regression models -(Version 0.0.1.3, updated on 2024-06-26, [release history](https://sfcheung.github.io/betaselectr/news/index.html)) +(Version 0.0.1.4, updated on 2024-06-27, [release history](https://sfcheung.github.io/betaselectr/news/index.html)) It computes Beta_Select, standardization in structural equation models with only diff --git a/_pkgdown.yml b/_pkgdown.yml index 202e79c..36977dc 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -52,6 +52,8 @@ reference: - predict.lm_betaselect - summary.lm_betaselect - vcov.lm_betaselect + - predict.glm_betaselect + - summary.glm_betaselect - title: Datasets desc: Datasets used in examples. diff --git a/man/anova.lm_betaselect.Rd b/man/anova.lm_betaselect.Rd index b1f7069..0683e76 100644 --- a/man/anova.lm_betaselect.Rd +++ b/man/anova.lm_betaselect.Rd @@ -2,17 +2,29 @@ % Please edit documentation in R/lm_betaselect_methods.R \name{anova.lm_betaselect} \alias{anova.lm_betaselect} -\title{ANOVA Tables for an -'lm_betaselect'-Class Object} +\alias{anova.glm_betaselect} +\title{ANOVA Tables For +'lm_betaselect' and 'glm_betaselect' +Objects} \usage{ \method{anova}{lm_betaselect}(object, ..., type = c("beta", "standardized", "raw", "unstandardized")) + +\method{anova}{glm_betaselect}( + object, + ..., + type = c("beta", "standardized", "raw", "unstandardized"), + dispersion = NULL, + test = NULL +) } \arguments{ \item{object}{The output of -\code{\link[=lm_betaselect]{lm_betaselect()}}.} +\code{\link[=lm_betaselect]{lm_betaselect()}} or +\code{\link[=glm_betaselect]{glm_betaselect()}}.} \item{...}{Additional outputs -of \code{\link[=lm_betaselect]{lm_betaselect()}}.} +of \code{\link[=lm_betaselect]{lm_betaselect()}} or +\code{\link[=glm_betaselect]{glm_betaselect()}}.} \item{type}{String. If \code{"unstandardized"} or \code{"raw"}, the @@ -22,6 +34,15 @@ are used If \code{"beta"} or output \emph{after} selected variables standardized are returned. Default is \code{"beta"}.} + +\item{dispersion}{To be passed to +\code{\link[stats:anova.glm]{stats::anova.glm()}}. The dispersion +parameter. Default ia \code{NULL} and it +is extracted from the model.} + +\item{test}{String. The test to be +conducted. Please refer to +\code{\link[stats:anova.glm]{stats::anova.glm()}} for details.} } \value{ It returns an object of class @@ -33,7 +54,8 @@ structure. Return the analysis of variance tables for the outputs of -\code{\link[=lm_betaselect]{lm_betaselect()}}. +\code{\link[=lm_betaselect]{lm_betaselect()}} and +\code{\link[=glm_betaselect]{glm_betaselect()}}. } \details{ By default, it calls \code{\link[stats:anova]{stats::anova()}} @@ -54,6 +76,18 @@ lm_beta_x <- lm_betaselect(dv ~ iv*mod + cov1 + cat1, anova(lm_beta_x) anova(lm_beta_x, type = "raw") + +data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, + yes = 1, + no = 0) +logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, + data = data_test_mod_cat, + family = binomial, + to_standardize = "iv") +anova(logistic_beta_x) +anova(logistic_beta_x, type = "raw") + } \seealso{ \code{\link[=lm_betaselect]{lm_betaselect()}} diff --git a/man/coef.lm_betaselect.Rd b/man/coef.lm_betaselect.Rd index 54cbc13..40c8cf1 100644 --- a/man/coef.lm_betaselect.Rd +++ b/man/coef.lm_betaselect.Rd @@ -2,8 +2,9 @@ % Please edit documentation in R/lm_betaselect_methods.R \name{coef.lm_betaselect} \alias{coef.lm_betaselect} -\title{Coefficients of an -'lm_betaselect'-Class Object} +\alias{coef.glm_betaselect} +\title{Coefficients of +Beta-Select in Linear Models} \usage{ \method{coef}{lm_betaselect}( object, @@ -11,11 +12,20 @@ type = c("beta", "standardized", "raw", "unstandardized"), ... ) + +\method{coef}{glm_betaselect}( + object, + complete = FALSE, + type = c("beta", "standardized", "raw", "unstandardized"), + ... +) } \arguments{ \item{object}{The output of -\code{\link[=lm_betaselect]{lm_betaselect()}}, or an -\code{lm_betaselect}-class object.} +\code{\link[=lm_betaselect]{lm_betaselect()}} or +\code{\link[=glm_betaselect]{glm_betaselect()}}, or an +\code{lm_betaselect}-class or +\code{glm_betaselect}-class object.} \item{complete}{If \code{TRUE}, it returns the full vector of coefficients, @@ -41,7 +51,8 @@ regression coefficients. \description{ Return the estimates of coefficients in an -\code{lm_betaselect}-class object. +\code{lm_betaselect}-class or +\code{glm_betaselect}-class object. } \details{ By default, it extracts the @@ -62,9 +73,22 @@ lm_beta_x <- lm_betaselect(dv ~ iv*mod + cov1 + cat1, coef(lm_beta_x) coef(lm_beta_x, type = "raw") + +data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, + yes = 1, + no = 0) +logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, + data = data_test_mod_cat, + family = binomial, + to_standardize = "iv") +coef(logistic_beta_x) +coef(logistic_beta_x, type = "raw") + } \seealso{ -\code{\link[=lm_betaselect]{lm_betaselect()}} +\code{\link[=lm_betaselect]{lm_betaselect()}} and +\code{\link[=glm_betaselect]{glm_betaselect()}} } \author{ Shu Fai Cheung \url{https://orcid.org/0000-0002-9871-9448} diff --git a/man/confint.lm_betaselect.Rd b/man/confint.lm_betaselect.Rd index ce7c358..6cb3bb0 100644 --- a/man/confint.lm_betaselect.Rd +++ b/man/confint.lm_betaselect.Rd @@ -2,8 +2,10 @@ % Please edit documentation in R/lm_betaselect_methods.R \name{confint.lm_betaselect} \alias{confint.lm_betaselect} -\title{Confidence Interval for an -'lm_betaselect'-Class Object} +\alias{confint.glm_betaselect} +\title{Confidence Interval for +'lm_betaselect' or 'glm_betaselect' +Objects} \usage{ \method{confint}{lm_betaselect}( object, @@ -15,10 +17,24 @@ boot_type = c("perc", "bc"), ... ) + +\method{confint}{glm_betaselect}( + object, + parm, + level = 0.95, + trace = FALSE, + test = c("LRT", "Rao"), + method = c("boot", "bootstrap", "default", "ls"), + type = c("beta", "standardized", "raw", "unstandardized"), + warn = TRUE, + boot_type = c("perc", "bc"), + ... +) } \arguments{ \item{object}{The output of -\code{\link[=lm_betaselect]{lm_betaselect()}}.} +\code{\link[=lm_betaselect]{lm_betaselect()}} or +\code{\link[=glm_betaselect]{glm_betaselect()}}.} \item{parm}{The terms for which the confidence intervals are returned. @@ -73,6 +89,18 @@ bootstrap confidence interval.} \item{...}{Optional arguments. Ignored.} + +\item{trace}{Logical. Whether profiling +will be traced. See +\code{\link[stats:confint]{stats::confint.glm()}} for details. +ignored if \code{method} is \code{"boot"} or +\code{"bootstrap"}.} + +\item{test}{The test used for +profiling. See \link[stats:confint]{stats::confint.glm} +for details. +ignored if \code{method} is \code{"boot"} or +\code{"bootstrap"}.} } \value{ A \emph{p} by 2 matrix of the confidence @@ -83,7 +111,8 @@ of coefficients. Return the confidence interval of the regression coefficients in the output of -\code{\link[=lm_betaselect]{lm_betaselect()}}. +\code{\link[=lm_betaselect]{lm_betaselect()}} or +\code{\link[=glm_betaselect]{glm_betaselect()}}. } \details{ The type of @@ -92,7 +121,7 @@ on the object. If bootstrapping was requested, by default it returns the percentile bootstrap confidence intervals. Otherwise, it returns the -OLS (or WLS) confidence intervals +default confidence intervals and raises a warning for the standardized solution. @@ -115,6 +144,23 @@ confint(lm_beta_x) confint(lm_beta_x, method = "ls") confint(lm_beta_x, type = "raw") + +data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, + yes = 1, + no = 0) +# bootstrap should be set to 2000 or 5000 in real studies +logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, + data = data_test_mod_cat, + family = binomial, + to_standardize = "iv", + do_boot = TRUE, + bootstrap = 100, + iseed = 1234) + +confint(logistic_beta_x, method = "default") +confint(logistic_beta_x, type = "raw") + } \seealso{ \code{\link[=lm_betaselect]{lm_betaselect()}} diff --git a/man/getCall.lm_betaselect.Rd b/man/getCall.lm_betaselect.Rd index e433b38..62da955 100644 --- a/man/getCall.lm_betaselect.Rd +++ b/man/getCall.lm_betaselect.Rd @@ -2,23 +2,34 @@ % Please edit documentation in R/lm_betaselect_methods.R \name{getCall.lm_betaselect} \alias{getCall.lm_betaselect} -\title{Extract the Call From an -'lm_betaselect' Object} +\alias{getCall.glm_betaselect} +\title{Call in an +'lm_betaselect' or 'glm_betaselect' +Object} \usage{ \method{getCall}{lm_betaselect}( x, what = c("lm_betaselect", "beta", "standardized", "raw", "unstandardized"), ... ) + +\method{getCall}{glm_betaselect}( + x, + what = c("glm_betaselect", "beta", "standardized", "raw", "unstandardized"), + ... +) } \arguments{ \item{x}{An \code{lm_betaselect}-class +or \code{glm_betaselect}-class object from which the call is to be extracted.} \item{what}{Which call to extract. -For \code{"lm_betaselect"}, the call -to \code{\link[=lm_betaselect]{lm_betaselect()}} is extracted. +For \code{"lm_betaselect"} or +\code{"glm_betaselect"} the call +to \code{\link[=lm_betaselect]{lm_betaselect()}} +or \code{\link[=glm_betaselect]{glm_betaselect()}} is extracted. For \code{"beta"} or \code{"standardized"}, the call used to fit the model \emph{after} @@ -37,12 +48,14 @@ It returns the call requested. } \description{ The \code{getCall}-method -for an \code{lm_betaselect}-class objects. +for an \code{lm_betaselect}-class or +\code{glm_betaselectd}-class objects. } \details{ This works in the same way the default \code{getCall}-method does for -the output of \code{\link[stats:lm]{stats::lm()}}. +the outputs of \code{\link[stats:lm]{stats::lm()}} +and \code{\link[stats:glm]{stats::glm()}}. } \examples{ @@ -57,7 +70,8 @@ getCall(lm_beta_x, what = "raw") } \seealso{ -\code{\link[=lm_betaselect]{lm_betaselect()}} and \code{\link[stats:update]{stats::getCall()}} +\code{\link[=lm_betaselect]{lm_betaselect()}}, +\code{\link[=glm_betaselect]{glm_betaselect()}}, and \code{\link[stats:update]{stats::getCall()}} } \author{ Shu Fai Cheung \url{https://orcid.org/0000-0002-9871-9448} diff --git a/man/lm_betaselect.Rd b/man/lm_betaselect.Rd index 4f7120f..90fcda2 100644 --- a/man/lm_betaselect.Rd +++ b/man/lm_betaselect.Rd @@ -2,12 +2,28 @@ % Please edit documentation in R/lm_betaselect.R \name{lm_betaselect} \alias{lm_betaselect} +\alias{glm_betaselect} \alias{print.lm_betaselect} +\alias{print.glm_betaselect} \alias{raw_output} \title{Standardize Coefficients in a Regression Model} \usage{ lm_betaselect( + ..., + to_standardize = NULL, + not_to_standardize = NULL, + do_boot = TRUE, + bootstrap = 100L, + iseed = NULL, + parallel = FALSE, + ncpus = parallel::detectCores(logical = FALSE) - 1, + progress = TRUE, + load_balancing = TRUE, + model_call = c("lm", "glm") +) + +glm_betaselect( ..., to_standardize = NULL, not_to_standardize = NULL, @@ -27,13 +43,24 @@ lm_betaselect( ... ) +\method{print}{glm_betaselect}( + x, + digits = max(3L, getOption("digits") - 3L), + type = c("beta", "standardized", "raw", "unstandardized"), + ... +) + raw_output(x) } \arguments{ -\item{...}{For \code{\link[=lm_betaselect]{lm_betaselect()}}, +\item{...}{For \code{\link[=lm_betaselect]{lm_betaselect()}}. these arguments will be passed directly to \code{\link[=lm]{lm()}}. For +\code{\link[=glm_betaselect]{glm_betaselect()}}, these arguments +will be passed to \code{\link[=glm]{glm()}}. +For the \code{print}-method of \code{lm_betaselect} +or \code{glm_betaselect} objects, this will be passed to other methods.} @@ -73,8 +100,8 @@ generator. Default is \code{NULL}.} \code{TRUE} and this argument is \code{TRUE}, parallel processing will be used to do bootstrapping. Default is \code{FALSE} -because bootstrapping in regression -by \code{\link[=lm]{lm()}} is rarely slow.} +because bootstrapping for models fitted +by \code{\link[=lm]{lm()}} or \code{\link[=glm]{glm()}} is rarely slow.} \item{ncpus}{If \code{do_boot} is \code{TRUE} and \code{parallel} is also \code{TRUE}, this @@ -92,7 +119,17 @@ for long process.} whether load balancing will be used. Default is \code{TRUE}.} -\item{x}{A \code{lm_betaselect}-class object.} +\item{model_call}{The model function +to be called. +If \code{"lm"}, the default, the model will be fitted +by \code{\link[stats:lm]{stats::lm()}}. If \code{"glm"}, the +model will be fitted by \code{\link[stats:glm]{stats::glm()}}. +Users should call the corresponding +function directly rather than setting +this argument manually.} + +\item{x}{An \code{lm_betaselect} or +\code{glm_betaselect} object.} \item{digits}{The number of significant digits to be printed for the @@ -110,14 +147,19 @@ be printed.} \value{ The function \code{\link[=lm_betaselect]{lm_betaselect()}} returns an object of the class \code{lm_betaselect}, -which is similar to the output -of \code{\link[=lm]{lm()}}, with additional information -stored. +The function \code{\link[=glm_betaselect]{glm_betaselect()}} +returns an object of the class +\code{glm_betaselect}. They are similar +in structure to the output of +\code{\link[stats:lm]{stats::lm()}} and \code{\link[stats:glm]{stats::glm()}}, +with additional information stored. The function \code{\link[=raw_output]{raw_output()}} returns -an object of the class \code{lm}, which are +an object of the class \code{lm} or +\code{glm}, which are the results of fitting the model -to the data by \code{\link[stats:lm]{stats::lm()}} without +to the data by \code{\link[stats:lm]{stats::lm()}} +or \code{\link[stats:glm]{stats::glm()}} without standardization. } \description{ @@ -128,21 +170,24 @@ skip categorical predictors in standardization. } \details{ -The function \code{\link[=lm_betaselect]{lm_betaselect()}} -lets users +The functions \code{\link[=lm_betaselect]{lm_betaselect()}} +and \code{\link[=glm_betaselect]{glm_betaselect()}} +let users select which variables to be standardized when computing the -standardized solution. It has the +standardized solution. They have the following features: \itemize{ -\item It automatically skips categorical +\item They automatically skip categorical predictors (i.e., factor or string variables). -\item It does not standardize product -term, which is incorrect. Instead, it +\item They do not standardize a product +term, which is incorrect. Instead, +they compute the product term with its -component variables standardized. -\item It standardizes the selected +component variables standardized, +if requested. +\item They standardize the selected variables \emph{before} fitting a model. Therefore, If a model has the term \code{log(x)} and \code{x} is one of the @@ -151,20 +196,22 @@ the logarithm of the \emph{standardized} \code{x} in the model, instead of standardized \code{log(x)} which is difficult to interpret. -\item It can be used to generate +\item They can be used to generate nonparametric bootstrap confidence intervals for the standardized solution. Bootstrap confidence interval is better than -the least square confidence interval +the default confidence interval ignoring the standardization because it takes into account the sampling variance of the standard deviations. -It is one of the recommended methods +Preliminary support for bootstrap +confidence has been found for forming confidence intervals for coefficients involving standardized -variables (Jones & Waller, 2013). +variables in linear regression +(Jones & Waller, 2013). } \subsection{Problems With Common Approaches}{ @@ -191,13 +238,13 @@ they are standardized (e.g., age). } } -\subsection{How It Works}{ +\subsection{How The Function Work}{ -It standardizes the original variables +They standardize the original variables \emph{before} they are used in the -regression model. Therefore, strictly -speaking, it does not standardize -the predictors in a regression model, +model. Therefore, strictly +speaking, they do not standardize +the predictors in model, but standardize the \emph{input variable} (Gelman et al., 2021). @@ -216,37 +263,50 @@ by \code{\link[=raw_output]{raw_output()}}. \subsection{Methods}{ The output of \code{\link[=lm_betaselect]{lm_betaselect()}} is -an \code{lm_betaselect}-class object. -It has the following methods:add +an \code{lm_betaselect}-class object, +and the output of \code{\link[=glm_betaselect]{glm_betaselect()}} +is a \code{glm_betaselect}-class object. +They have the following methods: \itemize{ \item A \code{coef}-method for extracting the coefficients of the model. -(See \code{\link[=coef.lm_betaselect]{coef.lm_betaselect()}} for details.) +(See \code{\link[=coef.lm_betaselect]{coef.lm_betaselect()}} +and \code{\link[=coef.glm_betaselect]{coef.glm_betaselect()}} +for details.) \item A \code{vcov}-method for extracting the variance-covariance matrix of the estimates of the coefficients. If bootstrapping is requested, it can return the matrix based on the bootstrapping estimates. -(See \code{\link[=vcov.lm_betaselect]{vcov.lm_betaselect()}} for details.) +(See \code{\link[=vcov.lm_betaselect]{vcov.lm_betaselect()}} +and \code{\link[=vcov.glm_betaselect]{vcov.glm_betaselect()}} +for details.) \item A \code{confint}-method for forming the confidence intervals of the estimates of the coefficients. If bootstrapping is requested, it can return the bootstrap confidence intervals. -(See \code{\link[=confint.lm_betaselect]{confint.lm_betaselect()}} for details.) +(See \code{\link[=confint.lm_betaselect]{confint.lm_betaselect()}} and +\code{\link[=confint.glm_betaselect]{confint.glm_betaselect()}} +for details.) \item A \code{summary}-method for printing the summary of the results, with additional information such as the number of bootstrap samples and which variables have been standardized. -(See \code{\link[=summary.lm_betaselect]{summary.lm_betaselect()}} for details.) +(See \code{\link[=summary.lm_betaselect]{summary.lm_betaselect()}} and +\code{\link[=summary.glm_betaselect]{summary.glm_betaselect()}} +for details.) \item An \code{anova}-method for printing the ANOVA table. Can also be used to compare two or more outputs of -\code{\link[=lm_betaselect]{lm_betaselect()}}. -(See \code{\link[=anova.lm_betaselect]{anova.lm_betaselect()}} for details.) +\code{\link[=lm_betaselect]{lm_betaselect()}} or +\code{\link[=glm_betaselect]{glm_betaselect()}} +(See \code{\link[=anova.glm_betaselect]{anova.glm_betaselect()}} +and \code{\link[=anova.glm_betaselect]{anova.glm_betaselect()}} +for details.) \item A \code{predict}-method for computing predicted values. It can be used to compute the predicted values given @@ -254,43 +314,53 @@ a set of new unstandardized data. The data will be standardized before computing the predicted values in the models with standardization. -(See \code{\link[=predict.lm_betaselect]{predict.lm_betaselect()}} for details.) +(See \code{\link[=predict.lm_betaselect]{predict.lm_betaselect()}} and +\code{\link[=predict.glm_betaselect]{predict.glm_betaselect()}} +for details.) \item The default \code{update}-method for updating a call also works for an -\code{lm_betaselect} object. It can +\code{lm_betaselect} object or +a \code{glm_betaselect()} object. It can update the model in the same way it updates a model fitted by -\code{\link[stats:lm]{stats::lm()}}, and also update -arguments of \code{\link[=lm_betaselect]{lm_betaselect()}}, +\code{\link[stats:lm]{stats::lm()}} or \code{\link[stats:glm]{stats::glm()}}, +and also update +the arguments of \code{\link[=lm_betaselect]{lm_betaselect()}} +or \code{\link[=glm_betaselect]{glm_betaselect()}} such as the variables to be standardized. (See \code{\link[stats:update]{stats::update()}} for details.) } Most other methods for the output -of \code{\link[stats:lm]{stats::lm()}} should also work -on an \code{lm_betaselect}-class object. +of \code{\link[stats:lm]{stats::lm()}} and \code{\link[stats:glm]{stats::glm()}} +should also work +on an \code{lm_betaselect}-class object +or a \code{glm_betaselect}-class object, +respectively. Some of them will give the same results regardless of the variables -standardized. For example, +standardized. Examples are \code{\link[=rstandard]{rstandard()}} and \code{\link[=cooks.distance]{cooks.distance()}}. For some others, they should be used with cautions if they make use of the variance-covariance matrix -of the estimates because they may use -the least square version. +of the estimates. To use the methods for \code{lm} objects +or \code{glm} objects on the results without standardization, simply use \code{\link[=raw_output]{raw_output()}}. For example, to get the fitted values without standardization, call \code{fitted(raw_output(x))}, where \code{x} -is the output of \code{\link[=lm_betaselect]{lm_betaselect()}}. +is the output of \code{\link[=lm_betaselect]{lm_betaselect()}} +or \code{\link[=glm_betaselect]{glm_betaselect()}}. } The function \code{\link[=raw_output]{raw_output()}} simply extracts the regression output by \code{\link[stats:lm]{stats::lm()}} +or \code{\link[stats:glm]{stats::glm()}} on the variables without standardization. } \examples{ @@ -323,6 +393,43 @@ lm_beta_all <- lm_betaselect(dv ~ iv*mod + cov1 + cat1, summary(lm_beta_all) +data(data_test_mod_cat) + +data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, + yes = 1, + no = 0) +# Standardize only iv +logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, + family = binomial, + data = data_test_mod_cat, + to_standardize = "iv") +summary(logistic_beta_x) + +logistic_beta_x +summary(logistic_beta_x) + +# Manually standardize iv and call glm() + +data_test_mod_cat$iv_z <- scale(data_test_mod_cat[, "iv"])[, 1] + +logistic_beta_x_manual <- glm(p ~ iv_z*mod + cov1 + cat1, + family = binomial, + data = data_test_mod_cat) + +coef(logistic_beta_x) +coef(logistic_beta_x_manual) + +# Standardize all numeric predictors + +logistic_beta_allx <- glm_betaselect(p ~ iv*mod + cov1 + cat1, + family = binomial, + data = data_test_mod_cat, + to_standardize = c("iv", "mod", "cov1")) +# Note that cat1 is not standardized +summary(logistic_beta_allx) + + summary(raw_output(lm_beta_x)) } @@ -347,7 +454,9 @@ intervals for standardized regression coefficients. \doi{10.1037/a0033269} } \seealso{ -\code{\link[=print.lav_betaselect]{print.lav_betaselect()}} for its print method. +\code{\link[=print.lm_betaselect]{print.lm_betaselect()}} and +\code{\link[=print.glm_betaselect]{print.glm_betaselect()}} for the +\code{print}-methods. } \author{ Shu Fai Cheung \url{https://orcid.org/0000-0002-9871-9448} diff --git a/man/predict.glm_betaselect.Rd b/man/predict.glm_betaselect.Rd new file mode 100644 index 0000000..03222db --- /dev/null +++ b/man/predict.glm_betaselect.Rd @@ -0,0 +1,91 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lm_betaselect_methods.R +\name{predict.glm_betaselect} +\alias{predict.glm_betaselect} +\title{Predict Method for a 'glm_betaselect' Object} +\usage{ +\method{predict}{glm_betaselect}( + object, + model_type = c("beta", "standardized", "raw", "unstandardized"), + newdata, + ... +) +} +\arguments{ +\item{object}{A \code{glm_betaselect}-class +object.} + +\item{model_type}{The model from which the +the predicted values are computed. +For +\code{"beta"} or \code{"standardized"}, the +model is the one after selected +variables standardized. For \code{"raw"} +or \code{"unstandardized"}, the model is +the one before standardization was +done.} + +\item{newdata}{If set to a data +frame, the predicted values are +computed using this data frame. +The data must be unstandardized. +That is, the variables are of the +same units as in the data frame +used in \code{\link[=glm_betaselect]{glm_betaselect()}}. If +\code{model_type} is \code{"beta"} or +\code{"standardized"}, it will be +standardized using the setting +of \code{to_standardize} when \code{object} +is created in \code{\link[=glm_betaselect]{glm_betaselect()}}.} + +\item{...}{Arguments +to be passed to \code{\link[stats:predict.glm]{stats::predict.glm()}}. +Please refer to the help page of +\code{\link[stats:predict.glm]{stats::predict.glm()}}.} +} +\value{ +It returns the output of \code{\link[stats:predict.glm]{stats::predict.glm()}}. +} +\description{ +Compute the predicted +values in a model fitted by +\code{\link[=glm_betaselect]{glm_betaselect()}}. +} +\details{ +It simply passes the model \emph{before} +or \emph{after} selected variables +are standardized to the +\code{predict}-method of a \code{glm} object. +\subsection{IMPORTANT}{ + +Some statistics, such as prediction +or confidence interval, which make use +of the sampling variances and +covariances of coefficient estimates +\emph{may} not be applicable to the +models with one or more variables +standardized. Therefore, they should +only be used for exploratory purpose. +} +} +\examples{ + +data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, + yes = 1, + no = 0) +logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, + data = data_test_mod_cat, + family = binomial, + to_standardize = "iv") + +predict(logistic_beta_x) +predict(logistic_beta_x, model_type = "raw") + +} +\seealso{ +\code{\link[=glm_betaselect]{glm_betaselect()}} and \code{\link[stats:predict.glm]{stats::predict.glm()}} +} +\author{ +Shu Fai Cheung \url{https://orcid.org/0000-0002-9871-9448} +} diff --git a/man/summary.glm_betaselect.Rd b/man/summary.glm_betaselect.Rd new file mode 100644 index 0000000..560751b --- /dev/null +++ b/man/summary.glm_betaselect.Rd @@ -0,0 +1,234 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lm_betaselect_methods.R +\name{summary.glm_betaselect} +\alias{summary.glm_betaselect} +\alias{print.summary.glm_betaselect} +\title{Summary of an +'glm_betaselect'-Class Object} +\usage{ +\method{summary}{glm_betaselect}( + object, + dispersion = NULL, + correlation = FALSE, + symbolic.cor = FALSE, + trace = FALSE, + test = c("LRT", "Rao"), + se_method = c("boot", "bootstrap", "z", "glm", "default"), + ci = FALSE, + level = 0.95, + boot_type = c("perc", "bc"), + boot_pvalue_type = c("asymmetric", "norm"), + type = c("beta", "standardized", "raw", "unstandardized"), + ... +) + +\method{print}{summary.glm_betaselect}( + x, + est_digits = 3, + symbolic.cor = x$symbolic.cor, + signif.stars = getOption("show.signif.stars"), + show.residuals = FALSE, + z_digits = 3, + pvalue_less_than = 0.001, + ... +) +} +\arguments{ +\item{object}{The output of +\code{\link[=glm_betaselect]{glm_betaselect()}}.} + +\item{dispersion}{The dispersion +parameter. If \code{NULL}, then it is +extracted from the object. If +a scalar, it will be used as +the dispersion parameter. See +\code{\link[stats:summary.glm]{stats::summary.glm()}} for details.} + +\item{correlation}{If \code{TRUE}, the +correlation matrix of the estimates +will be returned. The same argument +in \code{\link[stats:summary.glm]{stats::summary.glm()}}. Default +is \code{FALSE}.} + +\item{symbolic.cor}{If \code{TRUE}, +correlations are printed in symbolic +form as in \code{\link[stats:summary.glm]{stats::summary.glm()}}. +Default is \code{FALSE}.} + +\item{trace}{Logical. Whether +profiling will be traced when forming +the confidence interval if +\code{se_method} is \code{"default"}, \code{"z"}, or +\code{"glm"}. Ignored if \code{ci} is \code{FALSE}. +See \code{\link[stats:confint]{stats::confint.glm()}} for +details.} + +\item{test}{The test used for +\code{se_method} is \code{"default"}, \code{"z"}, or +\code{"glm"}. Ignored if \code{ci} is \code{FALSE}. +See \code{\link[stats:confint]{stats::confint.glm()}} for +details.} + +\item{se_method}{The method used to +compute the standard errors and +confidence intervals (if requested). +If bootstrapping was +requested when calling +\code{\link[=glm_betaselect]{glm_betaselect()}} and this argument +is set to \code{"bootstrap"} or \code{"boot"}, +the bootstrap standard errors are +returned. If bootstrapping +was not requested or if this argument +is set to \code{"z"}, \code{"glm"}, or \code{"default"}, +then the usual \code{glm} +standard errors are +returned, with a warning raised +unless \code{type} is \code{"raw"} or +\verb{"unstandardized".} +Default is \code{"boot"}.} + +\item{ci}{Logical. Whether +confidence intervals are computed. +Default is \code{FALSE.}} + +\item{level}{The level of confidence, +default is .95, returning the 95\% +confidence interval.} + +\item{boot_type}{The type of +bootstrap confidence intervals, +if requested. +Currently, it supports \code{"perc"}, +percentile bootstrap confidence +intervals, and \code{"bc"}, bias-corrected +bootstrap confidence interval.} + +\item{boot_pvalue_type}{The type +of \emph{p}-values if \code{se_method} is +\code{"boot"} or \code{"bootstrap"}. If \code{"norm"}, +then the \emph{z} score is used to compute +the \emph{p}-value using a +standard normal distribution. +If \code{"asymmetric"}, the default, then +the method presented in +Asparouhov and Muthén (2021) is used +to compute the \emph{p}-value based on the +bootstrap distribution.} + +\item{type}{String. If +\code{"unstandardized"} or \code{"raw"}, the +output \emph{before} standardization +are used If \code{"beta"} or +\code{"standardized"}, then the +output \emph{after} selected +variables standardized are returned. +Default is \code{"beta"}.} + +\item{...}{Additional arguments +passed to other methods.} + +\item{x}{The output of +\code{\link[=summary.glm_betaselect]{summary.glm_betaselect()}}.} + +\item{est_digits}{The number of +digits after the decimal to be +displayed for the coefficient +estimates, their standard errors, and +confidence intervals (if present). +Note that the values will be rounded +to this number of digits before +printing. If all digits at this +position are zero for all values, the +values may be displayed with fewer +digits. Note that the coefficient +table is printed by +\code{\link[stats:printCoefmat]{stats::printCoefmat()}}. If some +numbers are vary large, the number of +digits after the decimal may be +smaller than \code{est_digits} due to a +limit on the column width.} + +\item{signif.stars}{Whether "stars" +(asterisks) are printed to denote +the level of significance achieved +for each coefficient. Default is +\code{TRUE}.} + +\item{show.residuals}{If \code{TRUE}, +a summary of the deviance residuals +will be printed. Default is \code{FALSE}.} + +\item{z_digits}{The number of digits +after the decimal to be displayed for +the \emph{z} or similar statistic (in the +column \code{"z value"}).} + +\item{pvalue_less_than}{If a +\emph{p}-value is less than this value, it +will be displayed with \verb{"<(this value)".} For example, if +\code{pvalue_less_than} is .001, the +default, \emph{p}-values less than .001 +will be displayed as \verb{<.001}. This +value also determines the printout of +the \emph{p}-value of the \emph{F} statistic. +(This argument does what \code{eps.Pvalue} +does in \code{\link[stats:printCoefmat]{stats::printCoefmat()}}.)} +} +\value{ +It returns an object of class +\code{summary.glm_betaselect}, which is +similar to the output of +\code{\link[stats:summary.glm]{stats::summary.glm()}}, with additional +information on the standardization +and bootstrapping, if requested. + +The \code{print}-method of +\code{summary.glm_betaselect} is called +for its side effect. The object \code{x} +is returned invisibly. +} +\description{ +The \code{summary} method +for \code{glm_betaselect}-class objects. +} +\details{ +By default, it returns a +\code{summary.glm_betaselect}-class object +for the results with selected variables +standardized. By setting \code{type} to +\code{"raw"} or \code{"unstandardized"}, it +returns the summary for the results +\emph{before} standardization. + +The \code{print} method of +\code{summary.glm_betaselect}-class objects +is adapted from +\code{\link[stdmod:print.summary.std_selected]{stdmod::print.summary.std_selected()}}. +} +\examples{ + +data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, + yes = 1, + no = 0) +# bootstrap should be set to 2000 or 5000 in real studies +logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, + data = data_test_mod_cat, + family = binomial, + to_standardize = "iv", + do_boot = TRUE, + bootstrap = 100, + iseed = 1234) +summary(logistic_beta_x) + +} +\references{ +Asparouhov, A., & Muthén, B. (2021). Bootstrap p-value computation. +Retrieved from https://www.statmodel.com/download/FAQ-Bootstrap\%20-\%20Pvalue.pdf +} +\seealso{ +\code{\link[=glm_betaselect]{glm_betaselect()}} +} +\author{ +Shu Fai Cheung \url{https://orcid.org/0000-0002-9871-9448} +} diff --git a/man/vcov.lm_betaselect.Rd b/man/vcov.lm_betaselect.Rd index 8b15252..1d5f283 100644 --- a/man/vcov.lm_betaselect.Rd +++ b/man/vcov.lm_betaselect.Rd @@ -2,12 +2,22 @@ % Please edit documentation in R/lm_betaselect_methods.R \name{vcov.lm_betaselect} \alias{vcov.lm_betaselect} -\title{The 'vcov' Method for an -'lm_betaselect'-Class Object} +\alias{vcov.glm_betaselect} +\title{The 'vcov' Method for +'lm_betaselect' and \code{glm_betaselect} +Objects} \usage{ \method{vcov}{lm_betaselect}( object, - method = c("boot", "bootstrap", "ls"), + method = c("boot", "bootstrap", "ls", "default"), + type = c("beta", "standardized", "raw", "unstandardized"), + warn = TRUE, + ... +) + +\method{vcov}{glm_betaselect}( + object, + method = c("boot", "bootstrap", "ls", "default"), type = c("beta", "standardized", "raw", "unstandardized"), warn = TRUE, ... @@ -16,18 +26,22 @@ \arguments{ \item{object}{The output of \code{\link[=lm_betaselect]{lm_betaselect()}} -or an \code{lm_betaselect}-class object.} +or an \code{lm_betaselect}-class object, +or the output of \code{\link[=glm_betaselect]{glm_betaselect()}} +or a \code{glm_betaselect}-class object.} \item{method}{The method used to compute the variance-covariance matrix. If bootstrapping was requested when calling -\code{\link[=lm_betaselect]{lm_betaselect()}} and this argument +\code{\link[=lm_betaselect]{lm_betaselect()}} or +\code{\link[=glm_betaselect]{glm_betaselect()}} and this argument is set to \code{"bootstrap"} or \code{"boot"}, the bootstrap variance-covariance matrix is returned. If bootstrapping was not requested or if this argument -is set to \code{"ls"}, then the usual \code{lm} +is set to \code{"ls"} or \code{"default"}, +then the usual \code{lm} or \code{glm} variance-covariance matrix is returned, with a warning raised unless \code{type} is \code{"raw"} or @@ -65,7 +79,8 @@ estimates. Compute the variance-covariance matrix of estimates in the output of -\code{\link[=lm_betaselect]{lm_betaselect()}}. +\code{\link[=lm_betaselect]{lm_betaselect()}} or +\code{\link[=glm_betaselect]{glm_betaselect()}}. } \details{ The type of @@ -74,7 +89,7 @@ on the object. If bootstrapping was requested, by default it returns the bootstrap variance-covariance matrix. Otherwise, it returns the -OLS (or WLS) variance-covariance +default variance-covariance matrix and raises a warning. Support for other type of @@ -93,10 +108,33 @@ lm_beta_x <- lm_betaselect(dv ~ iv*mod + cov1 + cat1, bootstrap = 100, iseed = 1234) vcov(lm_beta_x) +# A warning is expected for the following call vcov(lm_beta_x, method = "ls") vcov(lm_beta_x, type = "raw") + +data_test_mod_cat$p <- scale(data_test_mod_cat$dv)[, 1] +data_test_mod_cat$p <- ifelse(data_test_mod_cat$p > 0, + yes = 1, + no = 0) +# bootstrap should be set to 2000 or 5000 in real studies +logistic_beta_x <- glm_betaselect(p ~ iv*mod + cov1 + cat1, + data = data_test_mod_cat, + family = binomial, + to_standardize = "iv", + do_boot = TRUE, + bootstrap = 100, + iseed = 1234) +vcov(logistic_beta_x) +# A warning is expected for the following call +vcov(logistic_beta_x, method = "default") +vcov(logistic_beta_x, type = "raw") + +} +\seealso{ +\code{\link[=lm_betaselect]{lm_betaselect()}} and +\code{\link[=glm_betaselect]{glm_betaselect()}} } \author{ Shu Fai Cheung \url{https://orcid.org/0000-0002-9871-9448} diff --git a/tests/testthat/test_glm_betaselect.R b/tests/testthat/test_glm_betaselect.R new file mode 100644 index 0000000..397889a --- /dev/null +++ b/tests/testthat/test_glm_betaselect.R @@ -0,0 +1,74 @@ +# Adapted from stdmod + +library(testthat) + +dat <- data_test_mod_cat + +transform0 <- function(data, vars) { + for (x in vars) { + data[x] <- scale(data[[x]])[, 1] + } + data + } + +dat$dv <- ifelse(dat$dv > mean(dat$dv), + yes = 1, + no = 0) + +lm_raw <- glm(dv ~ iv*mod + cov1 + cat1, dat, family = binomial) +lm_zx <- glm(dv ~ iv*mod + cov1 + cat1, transform0(dat, c("iv")), family = binomial) +lm_zw <- glm(dv ~ iv*mod + cov1 + cat1, transform0(dat, c("mod")), family = binomial) +lm_zxzw <- glm(dv ~ iv*mod + cov1 + cat1, transform0(dat, c("iv", "mod")), family = binomial) +lm_zall <- glm(dv ~ iv*mod + cov1 + cat1, transform0(dat, c("iv", "mod", "cov1")), family = binomial) +lm_inline <- glm(dv ~ I(iv^2)*mod + I(1 / cov1) + cat1, transform0(dat, c("iv", "mod", "cov1")), family = binomial) + +dat_tmp <- dat +dat_tmp$iv <- scale(dat$iv, scale = FALSE, center = TRUE)[, 1] +dat_tmp$mod <- scale(dat$mod, scale = sd(dat$mod), center = FALSE)[, 1] + +lm_beta_x <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, to_standardize = "iv", do_boot = FALSE, family = binomial) +lm_beta_w <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, to_standardize = "mod", do_boot = FALSE, family = binomial) +lm_beta_xw <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, to_standardize = c("mod", "iv"), do_boot = FALSE, family = binomial) +lm_beta_inline <- glm_betaselect(dv ~ I(iv^2)*mod + I(1/ cov1) + cat1, dat, not_to_standardize = "dv", do_boot = FALSE, family = binomial) + +test_that("Standardize x", { + expect_equal( + coef(lm_beta_x), coef(lm_zx), + ignore_attr = TRUE + ) + }) + +test_that("Standardize w", { + expect_equal( + coef(lm_beta_w), coef(lm_zw), + ignore_attr = TRUE + ) + }) + +test_that("Standardize xw", { + expect_equal( + coef(lm_beta_xw), coef(lm_zxzw), + ignore_attr = TRUE + ) + }) + +test_that("Inline terms", { + expect_equal( + coef(lm_beta_inline), coef(lm_inline), + ignore_attr = TRUE + ) + }) + +test_that("print.glm_betaselect", { + expect_output(print(lm_beta_x), + "Variable(s) standardized: iv", fixed = TRUE) + expect_output(print(lm_beta_x), + "betaselectr::std_data", fixed = TRUE) + expect_output(print(lm_beta_x, type = "raw"), + "data = dat", fixed = TRUE) + }) + +test_that("raw_output", { + expect_identical(coef(raw_output(lm_beta_xw)), + coef(lm_raw)) + }) diff --git a/tests/testthat/test_glm_betaselect_boot.R b/tests/testthat/test_glm_betaselect_boot.R new file mode 100644 index 0000000..a27cb1a --- /dev/null +++ b/tests/testthat/test_glm_betaselect_boot.R @@ -0,0 +1,69 @@ +skip_on_cran() +# A long test with parallel + +# Adapted from stdmod + +library(testthat) + +dat <- data_test_mod_cat + +transform0 <- function(data, vars) { + for (x in vars) { + data[x] <- scale(data[[x]])[, 1] + } + data + } + +dat$dv <- ifelse(dat$dv > mean(dat$dv), + yes = 1, + no = 0) + +lm_raw <- glm(dv ~ iv*mod + cov1 + cat1, dat, family = binomial) +lm_zx <- glm(dv ~ iv*mod + cov1 + cat1, transform0(dat, c("iv")), family = binomial) +lm_zw <- glm(dv ~ iv*mod + cov1 + cat1, transform0(dat, c("mod")), family = binomial) +lm_zxzw <- glm(dv ~ iv*mod + cov1 + cat1, transform0(dat, c("iv", "mod")), family = binomial) +lm_zall <- glm(dv ~ iv*mod + cov1 + cat1, transform0(dat, c("iv", "mod", "cov1")), family = binomial) +lm_inline <- glm(dv ~ I(iv^2)*mod + I(1 / cov1) + cat1, transform0(dat, c("iv", "mod", "cov1")), family = binomial) + +data_test_mod_cat$dv <- ifelse(data_test_mod_cat$dv > mean(data_test_mod_cat$dv), + yes = 1, + no = 0) +dat_tmp <- data_test_mod_cat +dat_tmp$dv <- ifelse(dat_tmp$dv > mean(dat_tmp$dv), + yes = 1, + no = 0) +n <- nrow(dat_tmp) +set.seed(5678) +i <- replicate(6, sample(n, size = n, replace = TRUE), simplify = FALSE) +dat_tmp <- dat_tmp[i[[5]], ] + +lm_beta_x <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data_test_mod_cat, to_standardize = "iv", do_boot = TRUE, bootstrap = 6, iseed = 5678, progress = FALSE, family = binomial) +lm_beta_w <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data_test_mod_cat, to_standardize = "mod", do_boot = TRUE, bootstrap = 6, iseed = 5678, progress = FALSE, family = binomial) +lm_beta_xw <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data_test_mod_cat, to_standardize = c("mod", "iv"), do_boot = TRUE, bootstrap = 6, iseed = 5678, progress = FALSE, family = binomial) + +test_that("Standardize x", { + tmp1 <- lm_beta_x$lm_betaselect$boot_out[[5]]$coef_std + tmp2 <- coef(update(lm_zx, data = transform0(dat_tmp, c("iv")))) + expect_equal( + tmp1, tmp2, + ignore_attr = TRUE + ) + }) + +test_that("Standardize w", { + tmp1 <- lm_beta_w$lm_betaselect$boot_out[[5]]$coef_std + tmp2 <- coef(update(lm_zw, data = transform0(dat_tmp, c("mod")))) + expect_equal( + tmp1, tmp2, + ignore_attr = TRUE + ) + }) + +test_that("Standardize xw", { + tmp1 <- lm_beta_xw$lm_betaselect$boot_out[[5]]$coef_std + tmp2 <- coef(update(lm_zxzw, data = transform0(dat_tmp, c("iv", "mod")))) + expect_equal( + tmp1, tmp2, + ignore_attr = TRUE + ) + }) diff --git a/tests/testthat/test_glm_betaselect_methods.R b/tests/testthat/test_glm_betaselect_methods.R new file mode 100644 index 0000000..80d72e0 --- /dev/null +++ b/tests/testthat/test_glm_betaselect_methods.R @@ -0,0 +1,292 @@ +skip("WIP") + +# Adapted from stdmod + +library(testthat) +library(boot) + +data(data_test_mod_cat) +data_test_mod_cat$dv <- ifelse(data_test_mod_cat$dv > mean(data_test_mod_cat$dv), + yes = 1, + no = 0) + +dat <- data_test_mod_cat + +transform0 <- function(data, vars) { + for (x in vars) { + data[x] <- scale(data[[x]])[, 1] + } + data + } + + +lm_raw <- glm(dv ~ iv*mod + cov1 + cat1, data_test_mod_cat, family = binomial) +lm_raw2 <- glm(dv ~ iv + mod + cov1 + cat1, data_test_mod_cat, family = binomial) +lm_zx <- glm(dv ~ iv*mod + cov1 + cat1, transform0(data_test_mod_cat, c("iv")), family = binomial) +lm_zw <- glm(dv ~ iv*mod + cov1 + cat1, transform0(data_test_mod_cat, c("mod")), family = binomial) +lm_zxzw <- glm(dv ~ iv*mod + cov1 + cat1, transform0(data_test_mod_cat, c("iv", "mod")), family = binomial) +lm_zall <- glm(dv ~ iv*mod + cov1 + cat1, transform0(data_test_mod_cat, c("iv", "mod", "cov1")), family = binomial) +lm_inline <- glm(dv ~ I(iv^2)*mod + I(1 / cov1) + cat1, transform0(data_test_mod_cat, c("iv", "mod", "cov1")), family = binomial) +lm_inline_raw <- glm(dv ~ I(iv^2)*mod + I(1 / cov1) + cat1, data_test_mod_cat, family = binomial) + +dat_tmp <- data_test_mod_cat +# dat_tmp$iv <- scale(dat$iv, scale = FALSE, center = TRUE)[, 1] +# dat_tmp$mod <- scale(dat$mod, scale = sd(dat$mod), center = FALSE)[, 1] +dat_tmp$iv <- scale(dat$iv)[, 1] +lm_raw_x <- glm(dv ~ iv*mod + cov1 + cat1, dat_tmp, family = binomial) + +lm_beta_x <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, to_standardize = "iv", do_boot = FALSE, family = binomial) +lm_beta_x2 <- glm_betaselect(dv ~ iv + mod + cov1 + cat1, dat, to_standardize = "iv", do_boot = FALSE, family = binomial) +lm_beta_w <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, to_standardize = "mod", do_boot = FALSE, family = binomial) +lm_beta_xw <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, to_standardize = c("mod", "iv"), do_boot = FALSE, family = binomial) +lm_beta_inline <- glm_betaselect(dv ~ I(iv^2)*mod + I(1/ cov1) + cat1, dat, not_to_standardize = "dv", do_boot = FALSE, family = binomial) +lm_beta_xyw_boot <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, dat, not_to_standardize = "dv", do_boot = TRUE, bootstrap = 100, iseed = 1234, progress = FALSE, family = binomial) + +set.seed(1234) +n <- nrow(data_test_mod_cat) +i <- replicate(100, sample(n, size = n, replace = TRUE), simplify = FALSE) +tmp <- sapply(i, function(xx) { + coef(glm(dv ~ iv*mod + cov1 + cat1, dat[xx, ], family = binomial)) + }) +vcov_raw_chk <- cov(t(tmp)) +set.seed(1234) +lm_raw_boot <- boot(dat, + function(d, i) { + coef(glm(dv ~ iv*mod + cov1 + cat1, d[i, ], family = binomial)) + }, + R = 100, + simple = TRUE) + +test_that("coef", { + expect_identical(lm_beta_x$coefficients, + coef(lm_beta_x)) + expect_identical(lm_raw$coefficients, + coef(lm_beta_x, type = "raw")) + expect_identical(lm_beta_xw$coefficients, + coef(lm_beta_xw)) + expect_identical(lm_raw$coefficients, + coef(lm_beta_xw, type = "raw")) + expect_identical(lm_beta_inline$coefficients, + coef(lm_beta_inline)) + expect_identical(lm_inline_raw$coefficients, + coef(lm_beta_inline, type = "raw")) + }) + +test_that("vcov", { + expect_warning(expect_warning(vcov(lm_beta_x), "changed"), + "should not") + expect_warning(vcov(lm_beta_x, method = "default")) + expect_equal(vcov(lm_beta_xyw_boot, method = "boot"), + vcov(lm_beta_xyw_boot)) + expect_warning(vcov(lm_beta_xyw_boot, method = "default"),) + expect_equal(vcov(lm_beta_xyw_boot, method = "default", type = "raw"), + vcov(lm_raw)) + expect_equal(vcov(lm_beta_xyw_boot, method = "boot", type = "raw"), + vcov_raw_chk) + }) + +test_that("confint", { + expect_warning(expect_warning(suppressMessages(confint(lm_beta_x)), "changed"), + "should not") + expect_warning(suppressMessages(confint(lm_beta_x, method = "ls"))) + expect_equal(confint(lm_beta_xyw_boot, method = "boot", level = .80, + parm = c("(Intercept)", "cat1gp2")), + confint(lm_beta_xyw_boot, level = .80, + parm = c("(Intercept)", "cat1gp2"))) + expect_warning(suppressMessages(confint(lm_beta_xyw_boot, method = "ls"))) + expect_equal(suppressMessages(confint(lm_beta_xyw_boot, method = "ls", type = "raw", + level = .75, parm = "iv")), + suppressMessages(confint(lm_raw, level = .75, parm = "iv"))) + expect_equal(as.vector(confint(lm_beta_xyw_boot, method = "boot", type = "raw", parm = "mod", level = .90)), + boot.ci(lm_raw_boot, type = "perc", index = 3, conf = .90)$perc[4:5]) + }) + +test_that("anova", { + expect_equal(anova(lm_beta_x), + anova(lm_raw)) + expect_equal(anova(lm_beta_x, lm_beta_x2), + anova(lm_raw, lm_raw2)) + expect_equal(anova(lm_beta_xw, type = "raw"), + anova(lm_raw)) + expect_equal(anova(lm_beta_x, lm_beta_x2, type = "raw"), + anova(lm_raw, lm_raw2)) + }) + +test_that("summary", { + lm_beta_x_lm <- lm_beta_x + class(lm_beta_x_lm) <- "glm" + expect_warning(summary(lm_beta_x), + "changed") + expect_equal(summary(lm_beta_x, type = "raw", se_method = "default")$coefficients, + summary(lm_raw)$coefficients) + expect_equal(summary(lm_beta_x, type = "beta", se_method = "default")$coefficients, + summary(lm_beta_x_lm)$coefficients) + expect_no_error(summary(lm_beta_xyw_boot)) + expect_equal(summary(lm_beta_xyw_boot, ci = TRUE, level = .90)$coefficients[, 2:3], + confint(lm_beta_xyw_boot, level = .90), + ignore_attr = TRUE) + expect_equal(suppressMessages(summary(lm_beta_xyw_boot, ci = TRUE, se_method = "default", level = .90)$coefficients[, 2:3]), + suppressMessages(confint(lm_beta_xyw_boot, level = .90, method = "default", warn = FALSE)), + ignore_attr = TRUE) + }) + +test_that("print.summary", { + expect_output(print(summary(lm_beta_x, type = "raw", se_method = "default")), + "*before*", fixed = TRUE) + expect_output(print(summary(lm_beta_x, type = "beta", se_method = "default")), + "should not be used", fixed = TRUE) + expect_output(print(summary(lm_beta_xyw_boot)), + "asymmetric", fixed = TRUE) + expect_output(print(summary(lm_beta_xyw_boot, ci = TRUE, level = .90)), + "Percentile", fixed = TRUE) + expect_output(print(suppressMessages(summary(lm_beta_xyw_boot, ci = TRUE, se_method = "default", level = .90))), + "should not be used") + }) + +test_that("logLik", { + expect_equal(logLik(lm_beta_x), + logLik(raw_output(lm_beta_x))) + expect_equal(logLik(lm_beta_w), + logLik(lm_raw)) + expect_equal(logLik(raw_output(lm_beta_w)), + logLik(lm_raw)) + }) + +test_that("extractAIC", { + expect_equal(extractAIC(lm_beta_w), + extractAIC(lm_raw)) + expect_equal(extractAIC(lm_beta_x), + extractAIC(lm_raw)) + expect_equal(extractAIC(raw_output(lm_beta_x)), + extractAIC(lm_raw)) + }) + +test_that("deviance", { + expect_equal(deviance(lm_beta_x), + deviance(lm_raw)) + expect_equal(deviance(lm_beta_w), + deviance(lm_raw)) + expect_equal(deviance(raw_output(lm_beta_xw)), + deviance(lm_raw)) + }) + +test_that("fitted", { + expect_equal(fitted(lm_beta_x), + fitted(lm_raw)) + expect_equal(fitted(lm_beta_w), + fitted(lm_raw)) + expect_equal(fitted(raw_output(lm_beta_xw)), + fitted(lm_raw)) + }) + +test_that("plot.lm", { + skip("To be tested in an interactive section") + # Should be tested in an interactive session + plot(lm_beta_x, which = 1) + plot(raw_output(lm_beta_x), which = 1) + }) + +test_that("predict", { + expect_equal(predict(lm_beta_xw, model_type = "raw"), + predict(lm_raw)) + dat_tmp3 <- data_test_mod_cat[10:20, ] + dat_tmp3$iv <- scale(dat_tmp3$iv)[, 1] + expect_equal(predict(lm_beta_x, newdata = data_test_mod_cat[10:20, ]), + predict(lm_raw_x, newdata = dat_tmp3)) + }) + +# add1 + +lm_raw_0 <- glm(dv ~ iv + mod, data_test_mod_cat, family = binomial) +lm_raw_1a <- glm(dv ~ iv + mod + cov1, data_test_mod_cat, family = binomial) +lm_raw_1b <- glm(dv ~ iv + mod + cat1, data_test_mod_cat, family = binomial) +add1(lm_raw_0, ~ . + cov1 + cat1) +extractAIC(lm_raw_1a) +extractAIC(lm_raw_1b) +# anova(lm_raw_1a)["Residuals", "Sum Sq"] +# anova(lm_raw_1b)["Residuals", "Sum Sq"] +drop1(lm_raw_1b) +drop1(lm_raw_1a, ~ cov1) +extractAIC(lm_raw_0) +# anova(lm_raw_0)["Residuals", "Sum Sq"] + +dat_tmp2 <- data_test_mod_cat +dat_tmp2$iv <- scale(dat_tmp2$iv)[, 1] +dat_tmp2$cov1 <- scale(dat_tmp2$cov1)[, 1] +# dat_tmp2$dv <- scale(dat_tmp2$dv)[, 1] +lm_beta_manual_0 <- glm(dv ~ iv + mod, dat_tmp2, family = binomial) +lm_beta_manual_1a <- glm(dv ~ iv + mod + cov1, dat_tmp2, family = binomial) +lm_beta_manual_1b <- glm(dv ~ iv + mod + cat1, dat_tmp2, family = binomial) +add1(lm_beta_manual_0, ~ . + cov1 + cat1) +extractAIC(lm_beta_manual_1a) +extractAIC(lm_beta_manual_1b) +# anova(lm_beta_manual_1a)["Residuals", "Sum Sq"] +# anova(lm_beta_manual_1b)["Residuals", "Sum Sq"] +drop1(lm_beta_manual_1b) +drop1(lm_beta_manual_1a, ~ cov1) +extractAIC(lm_beta_manual_0) +# anova(lm_beta_manual_0)["(lm_beta_manual_0siduals", "Sum Sq"] + +test_that("add1() and drop1()", { + lm_beta_0 <- glm_betaselect(dv ~ iv + mod, data_test_mod_cat, to_standardize = c("iv", "cov1"), progress = FALSE, do_boot = FALSE, family = binomial) + lm_beta_1a <- glm_betaselect(dv ~ iv + mod + cov1, data_test_mod_cat, to_standardize = c("iv", "cov1"), progress = FALSE, do_boot = FALSE, family = binomial) + lm_beta_1b <- glm_betaselect(dv ~ iv + mod + cat1, data_test_mod_cat, to_standardize = c("iv", "cov1"), progress = FALSE, do_boot = FALSE, family = binomial) + add1_out <- add1(lm_beta_0, ~ . + cov1 + cat1) + expect_equal(add1_out["cov1", "AIC"], + extractAIC(lm_beta_manual_1a)[2]) + expect_equal(add1_out["cat1", "AIC"], + extractAIC(lm_beta_manual_1b)[2]) + expect_equal(add1_out["cov1", "RSS"], + anova(lm_beta_manual_1a)["Residuals", "Sum Sq"]) + expect_equal(add1_out["cat1", "RSS"], + anova(lm_beta_manual_1b)["Residuals", "Sum Sq"]) + drop1_out1b <- drop1(lm_beta_1b) + drop1_out1a <- drop1(lm_beta_1a, ~ cov1) + expect_equal(drop1_out1b["cat1", "AIC"], + extractAIC(lm_beta_manual_0)[2]) + expect_equal(drop1_out1b["cat1", "RSS"], + anova(lm_beta_manual_0)["Residuals", "Sum Sq"]) + }) + +lm_beta_u0 <- glm_betaselect(dv ~ iv, data_test_mod_cat, to_standardize = "iv", do_boot = FALSE, family = binomial) +lm_beta_u1 <- glm_betaselect(dv ~ iv + mod + cov1, data_test_mod_cat, to_standardize = "iv", do_boot = FALSE, family = binomial) +lm_beta_u2 <- glm_betaselect(dv ~ iv*mod + cov1, data_test_mod_cat, to_standardize = "iv", do_boot = FALSE, family = binomial) +lm_beta_u3 <- glm_betaselect(dv ~ iv*mod + cov1 + cat1, data_test_mod_cat, to_standardize = "iv", do_boot = FALSE, family = binomial) +lm_beta_u4 <- glm_betaselect(dv ~ iv + mod + cov1 + cat1, data_test_mod_cat, to_standardize = "iv", do_boot = FALSE, family = binomial) +lm_beta_u5 <- glm_betaselect(dv ~ iv + mod + cov1, data_test_mod_cat, to_standardize = "mod", do_boot = FALSE, family = binomial) +lm_beta_u6 <- glm_betaselect(dv ~ iv + mod + cov1, data_test_mod_cat[20:50, ], to_standardize = "iv", do_boot = FALSE, family = binomial) + +test_that("getCall", { + expect_equal(as.character(getCall(lm_beta_u1)[[1]])[3], + "lm_betaselect") + expect_equal(as.character(getCall(lm_beta_u1, what = "beta")[[1]])[3], + "glm") + expect_equal(as.character(as.list(getCall(lm_beta_u1, what = "beta")$data)[[1]])[3], + "std_data") + expect_equal(as.character(getCall(lm_beta_u1, what = "raw")[[1]])[3], + "glm") + }) + +test_that("update", { + lm_beta_tmp <- update(lm_beta_u1, ~ . + cat1) + expect_equal(coef(lm_beta_tmp), + coef(lm_beta_u4)) + lm_beta_tmp <- update(lm_beta_u4, ~ . - cat1) + expect_equal(coef(lm_beta_tmp), + coef(lm_beta_u1)) + lm_beta_tmp <- update(lm_beta_u0, ~ . + mod + cov1 + cat1) + expect_equal(coef(lm_beta_tmp), + coef(lm_beta_u4)) + lm_beta_tmp <- update(lm_beta_u4, ~ . - cat1 - cov1 - mod) + expect_equal(coef(lm_beta_tmp), + coef(lm_beta_u0)) + lm_beta_tmp <- update(lm_beta_u1, ~ . + iv*mod) + expect_equal(sort(coef(lm_beta_tmp)), + sort(coef(lm_beta_u2))) + lm_beta_tmp <- update(lm_beta_u1, to_standardize = "mod") + expect_equal(sort(coef(lm_beta_tmp)), + sort(coef(lm_beta_u5))) + lm_beta_tmp <- update(lm_beta_u0, ~ . + mod + cov1, data = data_test_mod_cat[20:50, ]) + expect_equal(sort(coef(lm_beta_tmp)), + sort(coef(lm_beta_u6))) + }) diff --git a/tests/testthat/test_lm_betaselect_methods.R b/tests/testthat/test_lm_betaselect_methods.R index a34b59f..0a5701e 100644 --- a/tests/testthat/test_lm_betaselect_methods.R +++ b/tests/testthat/test_lm_betaselect_methods.R @@ -171,7 +171,7 @@ test_that("plot.lm", { skip("To be tested in an interactive section") # Should be tested in an interactive session plot(lm_beta_y) - plot(get_raqw(lm_beta_y)) + plot(raw_output(lm_beta_y)) }) test_that("predict", {