Merge pull request #8 from s3alfisc/dev

wildrwolf 0.6
s3alfisc · Mar 2, 2023 · 827d904 · 827d904
2 parents a8e2d68 + d3adb73
commit 827d904
Show file tree

Hide file tree

Showing 16 changed files with 231 additions and 194 deletions.
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -1,3 +1,3 @@
-Version: 0.4
-Date: 2023-01-31 21:17:12 UTC
-SHA: 330a5c75091bdc8379beee2152514daaa21838fa
+Version: 0.6.0
+Date: 2023-02-26 17:47:33 UTC
+SHA: 005d9844331395715354c0a92421d83071d03f8d
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,7 +2,7 @@ Package: wildrwolf
 Type: Package
 Title: Fast Computation of Romano-Wolf Corrected p-Values for
        Linear Regression Models
-Version: 0.5
+Version: 0.6.1
 Authors@R: 
     c(
     person(given = "Alexander",
@@ -14,7 +14,7 @@ Maintainer: Alexander Fischer <alexander-fischer1801@t-online.de>
 Description: Fast Routines to Compute Romano-Wolf corrected p-Values 
              (Romano and Wolf (2005a) <DOI:10.1198/016214504000000539>, 
              Romano and Wolf (2005b) <DOI:10.1111/j.1468-0262.2005.00615.x>) 
-             for objects of type "fixest" and "fixest_multi" from the "fixest" 
+             for objects of type 'fixest' and 'fixest_multi' from the 'fixest' 
              package via a wild (cluster) bootstrap.
 License: GPL (>= 3)
 Encoding: UTF-8

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+export(get_rwolf_pval)
 export(run_fwer_sim)
 export(rwolf)
 importFrom(MASS,mvrnorm)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,12 @@
+# wildrwolf 0.6.1 
+
+* Hot-Fix release to address ATLAS test failures. Failing 
+tests are disabled. 
+
+# wildrwolf 0.6.0 
+
+* Initial CRAN release. 
+
 # wildrwolf 0.2
 
 * add simulation function

diff --git a/R/get_rwolf_pval.R b/R/get_rwolf_pval.R
@@ -9,12 +9,16 @@ get_rwolf_pval <- function(t_stats, boot_t_stats){
   #' @param boot_t_stats A (B x S) matrix containing the 
   #'        bootstrapped t-statistics 
   #'        
-  #' @return A vector of Romano-Wolf corrected p-values        
+  #' @return A vector of Romano-Wolf corrected p-values     
+  #' @export    
 
   # note: this part very closely follows the p_adjust function from the hdm
   # package, written and maintained by Martin Spindler
   # code at https://github.com/cran/hdm/blob/master/R/p_adjust.R
 
+  t_stats <- abs(t_stats)
+  boot_t_stats <- abs(boot_t_stats)
+
   S <- ncol(boot_t_stats)
   B <- nrow(boot_t_stats)
 

diff --git a/R/rwolf.R b/R/rwolf.R
@@ -1,23 +1,22 @@
 #' Romano-Wolf multiple hypotheses adjusted p-values 
 #' 
 #' Function implements the Romano-Wolf multiple hypothesis correction procedure
-#' for objects of type fixest_multi (fixest_multi are objects created by 
-#' `fixest::feols()` that use `feols()` multiple-estimation interface). 
-#' Currently, the command is restricted to two-sided hypotheses and one-way 
-#' clustered standard errors. For the wild cluster bootstrap, 
-#' the null is always imposed.
-#' @param models An object of type fixest_multi or a list of objects of 
-#'        type fixest
+#' for objects of type `fixest_multi` (`fixest_multi` are objects created by 
+#' `fixest::feols()` that use `feols()` multiple-estimation interface).  
+#' The null hypothesis is always imposed on the bootstrap dgp.
+#' 
+#' @param models An object of type `fixest_multi` or a list of objects of 
+#'        type `fixest`, estimated via ordinary least squares (OLS)
 #' @param param The regression parameter to be tested
 #' @param R Hypothesis Vector giving linear combinations of coefficients.
 #'  Must be either NULL or a vector of the same length as `param`. 
 #'  If NULL, a vector of ones of length param.
 #' @param r A numeric. Shifts the null hypothesis 
-#'        H0: param = r vs H1: param != r  
+#'        H0: `param.` = r vs H1: `param.` != r  
 #' @param B The number of bootstrap iterations
 #' @param p_val_type Character vector of length 1. Type of hypothesis test 
-#'        By default "two-tailed". Other options include "equal-tailed", ">"
-#'         (for one-sided tests) and "<" (for two-sided tests). 
+#'        By default "two-tailed". Other options include "equal-tailed"
+#'         (for one-sided tests), ">" and "<" (for two-sided tests). 
 #' @param weights_type character or function. The character string specifies 
 #' the type of bootstrap to use: One of "rademacher", "mammen", "norm"
 #' and "webb". Alternatively, type can be a function(n) for drawing 
@@ -27,12 +26,11 @@
 #' possible combination once (enumeration). 
 #' @param bootstrap_type Either "11", "13", "31", "33", or "fnw11". 
 #' "fnw11" by default. See `?fwildclusterboot::boottest` for more details  
-#' @param seed Integer. Sets the random seed. NULL by default. 
-#' @param engine Should the wild cluster bootstrap run via fwildclusterboot's R 
-#'        implementation or via WildBootTests.jl? 'R' by default. 
-#'        The other option is 'WildBootTests.jl'. Running the bootstrap through 
-#'        WildBootTests.jl might significantly reduce the runtime of `rwolf()` 
-#'        for complex problems (e.g. problems with more than 500 clusters).
+#' @param engine Should the wild cluster bootstrap run via `fwildclusterboot's`
+#'  R implementation or via `WildBootTests.jl`? 'R' by default. 
+#'  The other option is `WildBootTests.jl`. Running the bootstrap through 
+#'  `WildBootTests.jl` might significantly reduce the runtime of `rwolf()` 
+#'  for complex problems (e.g. problems with more than 500 clusters).
 #' @param nthreads Integer. The number of threads to use when running the 
 #' bootstrap.
 #' @param ... additional function values passed to the bootstrap function. 
@@ -54,6 +52,11 @@
 #' \item{Pr(>|t|)}{The uncorrected pvalue for `param` in the respective model.}
 #' \item{RW Pr(>|t|)}{The Romano-Wolf corrected pvalue of hypothesis test for `param` in the respective model.}
 #' 
+#' @section Setting Seeds and Random Number Generation:
+#' 
+#' To guarantee reproducibility, please set a global random seeds via
+#' `set.seed()`.
+#' 
 #' @examples
 #'  
 #' library(fixest)
@@ -86,6 +89,8 @@
 #' @references 
 #' Clarke, Romano & Wolf (2019), STATA Journal. 
 #' IZA working paper: https://ftp.iza.org/dp12845.pdf
+#' 
+
 
 rwolf <- function(
     models,
@@ -95,7 +100,6 @@ rwolf <- function(
     r = 0,
     p_val_type = "two-tailed",
     weights_type = "rademacher",
-    seed = NULL, 
     engine = "R",
     nthreads = 1,
     bootstrap_type = "fnw11",
@@ -109,14 +113,10 @@ rwolf <- function(
   check_arg(weights_type, "charin(rademacher, mammen, webb, norm)")
   check_arg(bootstrap_type, "charin(11, 12, 13, 31, 33, fnw11)")
   check_arg(B, "integer scalar GT{99}")
-  check_arg(seed, "integer scalar | NULL")
   check_arg(engine, "charin(R, R-lean, WildBootTests.jl)")
   check_arg(nthreads, "scalar integer")
-
-  if(is.null(seed)){
-    seed <- sample.int(2147483647L, 1)
-  }
-
+
+
   if (inherits(param, "formula")) {
     param <- attr(terms(param), "term.labels")
   }
@@ -153,25 +153,35 @@ rwolf <- function(
   ses <- unlist(
     lapply(1:S, function(x) get_stats_fixest(x, stat = "Std. Error")))
   # absolute value for t-stats
-  t_stats <- abs(
-    unlist(lapply(1:S, function(x) get_stats_fixest(x, stat = "t value"))))
+  # t_stats <- abs(
+  #   unlist(lapply(1:S, function(x) get_stats_fixest(x, stat = "t value"))))
 
   # repeat line: for multiway clustering, it is not clear how many bootstrap 
   # test statistics will be invalied - for oneway, 
   # all vectors of length(boot_coefs) \leq B
 
   boot_coefs <- boot_ses <- matrix(NA, B, S) 
+  t_stats <- rep(NA, S)
   boot_t_stats <- list()
 
   # boottest() over all models for param
   pb <- txtProgressBar(min = 0, max = S, style = 3)
 
+  # reset global seed state once exciting the function
+  global_seed <- .Random.seed
+  on.exit(set.seed(global_seed))
+
+  internal_seed <- sample.int(.Machine$integer.max, 1L)
+
   res <- 
     lapply(seq_along(models), 
            function(x){
 
-             setTxtProgressBar(pb, x)
+             # set seed, to guarantee that all S calls to 
+             # boottest() generate the same weight matrices
+             # affects global seed outside of 'rwolf()'!
 
+             set.seed(internal_seed)
              clustid <- models[[x]]$call$cluster
 
              boottest_quote <-
@@ -185,7 +195,7 @@ rwolf <- function(
                    engine = engine,
                    p_val_type = p_val_type,
                    type = weights_type, 
-                   seed = seed
+                   sampling = "standard"
                  )
                )
 
@@ -198,15 +208,20 @@ rwolf <- function(
              }
 
              suppressMessages(
-               boottest_eval <- eval(boottest_quote)
+              boottest_eval <- 
+                  eval(boottest_quote)
              )
+
+             setTxtProgressBar(pb, x)
 
+             boottest_eval
+
            })
 
   for(x in seq_along(models)){
     # take absolute values of bootstrap t statistics
-    t_stats[x] <- abs(res[[x]]$t_stat)
-    boot_t_stats[[x]] <- abs(res[[x]]$t_boot)     
+    t_stats[x] <- (res[[x]]$t_stat)
+    boot_t_stats[[x]] <- (res[[x]]$t_boot)     
   }
 
   boot_t_stats <- Reduce(cbind, boot_t_stats)

diff --git a/R/rwolf_sim.R b/R/rwolf_sim.R
@@ -160,21 +160,18 @@ run_fwer_sim <- function(
   #'  argument`rho` for more information.}
   #'  
   #' @examples
-  #' \dontrun{
+  #' \donttest{
   #'
+  #' # N, B, n_sims, chosen so that the example runs quicker
+  #' # for a higher quality simulation, increase all values
   #' res <- run_fwer_sim(
   #'   seed = 123,
-  #'   n_sims = 1000,
-  #'   B = 4999,
-  #'   N = 1000,
-  #'   s = 10
+  #'   n_sims = 10,
+  #'   B = 199,
+  #'   N = 100,
+  #'   s = 10, 
+  #'   rho = 0
   #' )
-  #' res
-  #' 
-  #' # >                 reject_5 reject_10 rho
-  #' # > fit_pvalue         0.282     0.502 0.5
-  #' # > fit_pvalue_holm    0.061     0.104 0.5
-  #' # > fit_padjust_rw     0.059     0.105 0.5
   #'
   #' }
 

diff --git a/README.Rmd b/README.Rmd
@@ -18,8 +18,9 @@ knitr::opts_chunk$set(
 
 <!-- badges: start -->
 [![R-CMD-check](https://github.com/s3alfisc/wildrwolf/workflows/R-CMD-check/badge.svg)](https://github.com/s3alfisc/wildrwolf/actions)
-[![pkgcheck](https://github.com/s3alfisc/wildrwolf/workflows/pkgcheck/badge.svg)](https://github.com/s3alfisc/wildrwolf/actions?query=workflow%3Apkgcheck)
+[![](http://cranlogs.r-pkg.org/badges/last-month/wildrwolf)](https://cran.r-project.org/package=wildrwolf)
 [![Lifecycle: experimental](https://img.shields.io/badge/lifecycle-experimental-orange.svg)](https://lifecycle.r-lib.org/articles/stages.html)
+[![](https://www.r-pkg.org/badges/version/wildrwolf)](https://cran.r-project.org/package=wildrwolf)
 ![runiverse-package](https://s3alfisc.r-universe.dev/badges/wildrwolf)
 [![Codecov test coverage](https://codecov.io/gh/s3alfisc/wildrwolf/branch/main/graph/badge.svg)](https://app.codecov.io/gh/s3alfisc/wildrwolf?branch=main)
 <!-- badges: end -->
@@ -34,9 +35,11 @@ Adding support for multi-way clustering is work in progress.
 
 ## Installation
 
-You can install the development version from [GitHub](https://github.com/) with:
+You can install the package from CRAN and the development version from [GitHub](https://github.com/) with:
 
 ``` r
+install.packages("wildrwolf")
+
 # install.packages("devtools")
 devtools::install_github("s3alfisc/wildrwolf")
 
@@ -89,8 +92,7 @@ rm(list= ls()[!(ls() %in% c('fit','data'))])
 res_rwolf1 <- wildrwolf::rwolf(
   models = fit,
   param = "X1", 
-  B = 9999, 
-  seed = 23
+  B = 9999
 )
 
 pvals <- lapply(fit, function(x) pvalue(x)["X1"]) |> unlist()
@@ -127,8 +129,7 @@ if(requireNamespace("microbenchmark")){
     "Romano-Wolf" = wildrwolf::rwolf(
       models = fit,
       param = "X1", 
-      B = 9999, 
-      seed = 23
+      B = 9999 
     ), 
     times = 1
   )
@@ -233,7 +234,7 @@ models <- feols(c(Y1, Y2, Y3, Y4) ~ X1 + X2
 ```
 
 ```{r}
-rwolf(models, param = "X1", B = 9999, seed = 123)
+rwolf(models, param = "X1", B = 9999)
 
 ```