diff --git a/.Rbuildignore b/.Rbuildignore
index 691c7f148..8c1f77aae 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -7,8 +7,8 @@
^docs$
^pkgdown$
^man-roxygen$
-^dev-helpers\.R$
^LICENSE\.md$
^\.appveyor\.yml$
^\.github$
^vignettes/articles-online-only$
+^release-prep\.R$
diff --git a/.gitignore b/.gitignore
index 8dc61d80a..db3f1c32c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,6 @@
.Ruserdata
.vscode/*
-design-questions/*
inst/doc
dev-helpers.R
+release-prep.R
diff --git a/DESCRIPTION b/DESCRIPTION
index 46a5814db..b656d0d98 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
Package: cmdstanr
Title: R Interface to 'CmdStan'
-Version: 0.5.3
-Date: 2022-04-24
+Version: 0.6.0
+Date: 2023-07-25
Authors@R:
c(person(given = "Jonah", family = "Gabry", role = c("aut", "cre"),
email = "jsg2201@columbia.edu"),
diff --git a/NEWS.md b/NEWS.md
index 495c4d477..863988233 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,37 @@
+# cmdstanr 0.6.0
+
+### Major new features
+
+* New `expose_functions()` method to expose Stan functions to R by @andrjohns in #702. See `?expose_functions`.
+* New methods for accessing log_prob, grad_log_prob, hessian, un/constrain variables by @andrjohns in #701. See `?init_model_methods`.
+
+### Other changes
+
+* mod$variables works w includes in precompile state (fix #680) by @MKyhos in #682
+* Update broken link for Stan OpenCL support page by @erictleung in #686
+* Add newline to check syntax output by @rok-cesnovar in #689
+* Allow exposing functions without sampling by @andrjohns in #705
+* Expose skeleton by @andrjohns in #706
+* WSL - Run cmdstan and models under WSL filesystem by @andrjohns in #696
+* Bugfix - Deep copy method/function environments by @andrjohns in #709
+* Add option for including jacobian adjustments in hessian method by @andrjohns in #710
+* WSL Optimisations and Bugfixes for CI by @andrjohns in #711
+* add stancflags from make/local by @rok-cesnovar in #690
+* Update co-authors by @andrjohns in #715
+* Update model methods parameter naming and extract skeleton function by @andrjohns in #724
+* Add method for unconstraining all parameter draws by @andrjohns in #729
+* Improve efficiency of variable matching by @sbfnk in #736
+* Add verbosity to download output and errors by @andrjohns in #745
+* Update handling of show_messages, add show_exceptions by @andrjohns in #746
+* Rtools43 support by @andrjohns in #755
+* Add stanc M1 make patch, suppress boost warnings by @andrjohns in #756
+* more examples of summary method by @gravesti in #751
+* Fix model$format and model$check_syntax for compiled models with include-paths by @adrian-lison in #775
+* Generalise RTools config/support by @andrjohns in #777
+* New posterior vignette by @gravesti in #719
+* Add moment-matching support to $loo() method by @andrjohns in #778
+* replace \ with function by @jsocolar in #789
+
# cmdstanr 0.5.3
### New features
diff --git a/R/csv.R b/R/csv.R
index 9ab528e1f..5d3824428 100644
--- a/R/csv.R
+++ b/R/csv.R
@@ -422,7 +422,7 @@ read_cmdstan_csv <- function(files,
#' Read CmdStan CSV files from sampling into \R
#'
#' Deprecated. Use [read_cmdstan_csv()] instead.
-#'
+#' @keywords internal
#' @export
#' @param files,variables,sampler_diagnostics Deprecated. Use
#' [read_cmdstan_csv()] instead.
diff --git a/R/example.R b/R/example.R
index 388c555ef..755f33703 100644
--- a/R/example.R
+++ b/R/example.R
@@ -188,7 +188,7 @@ write_stan_file <- function(code,
#' Write Stan code to a temporary file
#'
#' This function is deprecated. Please use [write_stan_file()] instead.
-#'
+#' @keywords internal
#' @export
#' @inheritParams write_stan_file
write_stan_tempfile <- function(code, dir = tempdir()) {
diff --git a/R/fit.R b/R/fit.R
index 0bffa1cc5..f9dd4d8d2 100644
--- a/R/fit.R
+++ b/R/fit.R
@@ -82,7 +82,7 @@ CmdStanFit <- R6::R6Class(
invisible(self)
},
expose_functions = function(global = FALSE, verbose = FALSE) {
- expose_functions(self$functions, global, verbose)
+ expose_stan_functions(self$functions, global, verbose)
invisible(NULL)
}
),
@@ -301,12 +301,18 @@ init <- function() {
CmdStanFit$set("public", name = "init", value = init)
#' Compile additional methods for accessing the model log-probability function
-#' and parameter constraining and unconstraining. This requires the `Rcpp` package.
+#' and parameter constraining and unconstraining.
#'
#' @name fit-method-init_model_methods
#' @aliases init_model_methods
-#' @description The `$init_model_methods()` compiles and initializes the
-#' `log_prob`, `grad_log_prob`, `constrain_variables`, and `unconstrain_variables` functions.
+#'
+#' @description The `$init_model_methods()` method compiles and initializes the
+#' `log_prob`, `grad_log_prob`, `constrain_variables`, `unconstrain_variables`
+#' and `unconstrain_draws` functions. These are then available as methods of
+#' the fitted model object. This requires the `Rcpp` package.
+#'
+#' Note: there may be many compiler warnings emitted during compilation but
+#' these can be ignored so long as they are warnings and not errors.
#'
#' @param seed (integer) The random seed to use when initializing the model.
#' @param verbose (boolean) Whether to show verbose logging during compilation.
@@ -317,6 +323,9 @@ CmdStanFit$set("public", name = "init", value = init)
#' fit_mcmc <- cmdstanr_example("logistic", method = "sample")
#' fit_mcmc$init_model_methods()
#' }
+#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()],
+#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()],
+#' [hessian()]
#'
init_model_methods <- function(seed = 0, verbose = FALSE, hessian = FALSE) {
if (os_is_wsl()) {
@@ -358,9 +367,13 @@ CmdStanFit$set("public", name = "init_model_methods", value = init_model_methods
#' \dontrun{
#' fit_mcmc <- cmdstanr_example("logistic", method = "sample")
#' fit_mcmc$init_model_methods()
-#' fit_mcmc$log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1))
+#' fit_mcmc$log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2))
#' }
#'
+#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()],
+#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()],
+#' [hessian()]
+#'
log_prob <- function(unconstrained_variables, jacobian_adjustment = TRUE) {
if (is.null(private$model_methods_env_$model_ptr)) {
stop("The method has not been compiled, please call `init_model_methods()` first",
@@ -383,18 +396,22 @@ CmdStanFit$set("public", name = "log_prob", value = log_prob)
#' @description The `$grad_log_prob()` method provides access to the
#' Stan model's `log_prob` function and its derivative
#'
-#' @param unconstrained_variables (numeric) A vector of unconstrained parameters to be passed
-#' to `grad_log_prob`
-#' @param jacobian_adjustment (bool) Whether to include the log-density adjustments from
-#' un/constraining variables
+#' @param unconstrained_variables (numeric) A vector of unconstrained parameters
+#' to be passed to `grad_log_prob`.
+#' @param jacobian_adjustment (bool) Whether to include the log-density
+#' adjustments from un/constraining variables.
#'
#' @examples
#' \dontrun{
#' fit_mcmc <- cmdstanr_example("logistic", method = "sample")
#' fit_mcmc$init_model_methods()
-#' fit_mcmc$grad_log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1))
+#' fit_mcmc$grad_log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2))
#' }
#'
+#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()],
+#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()],
+#' [hessian()]
+#'
grad_log_prob <- function(unconstrained_variables, jacobian_adjustment = TRUE) {
if (is.null(private$model_methods_env_$model_ptr)) {
stop("The method has not been compiled, please call `init_model_methods()` first",
@@ -417,18 +434,22 @@ CmdStanFit$set("public", name = "grad_log_prob", value = grad_log_prob)
#' @description The `$hessian()` method provides access to the
#' Stan model's `log_prob`, its derivative, and its hessian
#'
-#' @param unconstrained_variables (numeric) A vector of unconstrained parameters to be passed
-#' to `hessian`
-#' @param jacobian_adjustment (bool) Whether to include the log-density adjustments from
-#' un/constraining variables
+#' @param unconstrained_variables (numeric) A vector of unconstrained parameters
+#' to be passed to `hessian`.
+#' @param jacobian_adjustment (bool) Whether to include the log-density
+#' adjustments from un/constraining variables.
#'
#' @examples
#' \dontrun{
-#' fit_mcmc <- cmdstanr_example("logistic", method = "sample")
-#' fit_mcmc$init_model_methods()
-#' fit_mcmc$hessian(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1))
+#' # fit_mcmc <- cmdstanr_example("logistic", method = "sample")
+#' # fit_mcmc$init_model_methods(hessian = TRUE)
+#' # fit_mcmc$hessian(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2))
#' }
#'
+#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()],
+#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()],
+#' [hessian()]
+#'
hessian <- function(unconstrained_variables, jacobian_adjustment = TRUE) {
if (is.null(private$model_methods_env_$model_ptr)) {
stop("The method has not been compiled, please call `init_model_methods()` first",
@@ -450,8 +471,8 @@ CmdStanFit$set("public", name = "hessian", value = hessian)
#' @description The `$unconstrain_variables()` method transforms input parameters to
#' the unconstrained scale
#'
-#' @param variables (list) A list of parameter values to transform, in the same format as
-#' provided to the `init` argument of the `$sample()` method
+#' @param variables (list) A list of parameter values to transform, in the same
+#' format as provided to the `init` argument of the `$sample()` method.
#'
#' @examples
#' \dontrun{
@@ -460,6 +481,10 @@ CmdStanFit$set("public", name = "hessian", value = hessian)
#' fit_mcmc$unconstrain_variables(list(alpha = 0.5, beta = c(0.7, 1.1, 0.2)))
#' }
#'
+#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()],
+#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()],
+#' [hessian()]
+#'
unconstrain_variables <- function(variables) {
if (is.null(private$model_methods_env_$model_ptr)) {
stop("The method has not been compiled, please call `init_model_methods()` first",
@@ -521,6 +546,10 @@ CmdStanFit$set("public", name = "unconstrain_variables", value = unconstrain_var
#' unconstrained_draws <- fit_mcmc$unconstrain_draws(draws = fit_mcmc$draws())
#' }
#'
+#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()],
+#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()],
+#' [hessian()]
+#'
unconstrain_draws <- function(files = NULL, draws = NULL) {
if (!is.null(files) || !is.null(draws)) {
if (!is.null(files) && !is.null(draws)) {
@@ -565,6 +594,7 @@ unconstrain_draws <- function(files = NULL, draws = NULL) {
self$unconstrain_variables(variables = par_list)
})
})
+ unconstrained
}
CmdStanFit$set("public", name = "unconstrain_draws", value = unconstrain_draws)
@@ -587,6 +617,10 @@ CmdStanFit$set("public", name = "unconstrain_draws", value = unconstrain_draws)
#' fit_mcmc$variable_skeleton()
#' }
#'
+#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()],
+#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()],
+#' [hessian()]
+#'
variable_skeleton <- function(transformed_parameters = TRUE, generated_quantities = TRUE) {
if (is.null(private$model_methods_env_$model_ptr)) {
stop("The method has not been compiled, please call `init_model_methods()` first",
@@ -607,19 +641,24 @@ CmdStanFit$set("public", name = "variable_skeleton", value = variable_skeleton)
#' @description The `$constrain_variables()` method transforms input parameters to
#' the constrained scale
#'
-#' @param unconstrained_variables (numeric) A vector of unconstrained parameters to constrain
-#' @param transformed_parameters (boolean) Whether to return transformed parameters
-#' implied by newly-constrained parameters (defaults to TRUE)
+#' @param unconstrained_variables (numeric) A vector of unconstrained parameters
+#' to constrain.
+#' @param transformed_parameters (boolean) Whether to return transformed
+#' parameters implied by newly-constrained parameters (defaults to TRUE).
#' @param generated_quantities (boolean) Whether to return generated quantities
-#' implied by newly-constrained parameters (defaults to TRUE)
+#' implied by newly-constrained parameters (defaults to TRUE).
#'
#' @examples
#' \dontrun{
#' fit_mcmc <- cmdstanr_example("logistic", method = "sample")
#' fit_mcmc$init_model_methods()
-#' fit_mcmc$constrain_variables(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1))
+#' fit_mcmc$constrain_variables(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2))
#' }
#'
+#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()],
+#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()],
+#' [hessian()]
+#'
constrain_variables <- function(unconstrained_variables, transformed_parameters = TRUE,
generated_quantities = TRUE) {
if (is.null(private$model_methods_env_$model_ptr)) {
@@ -1233,6 +1272,20 @@ CmdStanFit$set("public", name = "code", value = code)
#' [`$time()`][fit-method-time] | Report total and chain-specific run times. |
#' [`$return_codes()`][fit-method-return_codes] | Return the return codes from the CmdStan runs. |
#'
+#' ## Expose Stan functions and additional methods to R
+#'
+#' |**Method**|**Description**|
+#' |:----------|:---------------|
+#' [`$expose_functions()`][fit-method-expose_functions] | Expose Stan functions for use in R. |
+#' [`$init_model_methods()`][fit-method-init_model_methods] | Expose methods for log-probability, gradients, parameter constraining and unconstraining. |
+#' [`$log_prob()`][fit-method-log_prob] | Calculate log-prob. |
+#' [`$grad_log_prob()`][fit-method-grad_log_prob] | Calculate log-prob and gradient. |
+#' [`$hessian()`][fit-method-hessian] | Calculate log-prob, gradient, and hessian. |
+#' [`$constrain_variables()`][fit-method-constrain_variables] | Transform a set of unconstrained parameter values to the constrained scale. |
+#' [`$unconstrain_variables()`][fit-method-unconstrain_variables] | Transform a set of parameter values to the unconstrained scale. |
+#' [`$unconstrain_draws()`][fit-method-unconstrain_draws] | Transform all parameter draws to the unconstrained scale. |
+#' [`$variable_skeleton()`][fit-method-variable_skeleton] | Helper function to re-structure a vector of constrained parameter values. |
+#'
CmdStanMCMC <- R6::R6Class(
classname = "CmdStanMCMC",
inherit = CmdStanFit,
@@ -1718,6 +1771,20 @@ CmdStanMCMC$set("public", name = "num_chains", value = num_chains)
#' [`$output()`][fit-method-output] | Pretty print the output that was printed to the console. |
#' [`$return_codes()`][fit-method-return_codes] | Return the return codes from the CmdStan runs. |
#'
+#' ## Expose Stan functions and additional methods to R
+#'
+#' |**Method**|**Description**|
+#' |:----------|:---------------|
+#' [`$expose_functions()`][fit-method-expose_functions] | Expose Stan functions for use in R. |
+#' [`$init_model_methods()`][fit-method-init_model_methods] | Expose methods for log-probability, gradients, parameter constraining and unconstraining. |
+#' [`$log_prob()`][fit-method-log_prob] | Calculate log-prob. |
+#' [`$grad_log_prob()`][fit-method-grad_log_prob] | Calculate log-prob and gradient. |
+#' [`$hessian()`][fit-method-hessian] | Calculate log-prob, gradient, and hessian. |
+#' [`$constrain_variables()`][fit-method-constrain_variables] | Transform a set of unconstrained parameter values to the constrained scale. |
+#' [`$unconstrain_variables()`][fit-method-unconstrain_variables] | Transform a set of parameter values to the unconstrained scale. |
+#' [`$unconstrain_draws()`][fit-method-unconstrain_draws] | Transform all parameter draws to the unconstrained scale. |
+#' [`$variable_skeleton()`][fit-method-variable_skeleton] | Helper function to re-structure a vector of constrained parameter values. |
+#'
CmdStanMLE <- R6::R6Class(
classname = "CmdStanMLE",
inherit = CmdStanFit,
@@ -1821,6 +1888,20 @@ CmdStanMLE$set("public", name = "mle", value = mle)
#' [`$output()`][fit-method-output] | Pretty print the output that was printed to the console. |
#' [`$return_codes()`][fit-method-return_codes] | Return the return codes from the CmdStan runs. |
#'
+#' ## Expose Stan functions and additional methods to R
+#'
+#' |**Method**|**Description**|
+#' |:----------|:---------------|
+#' [`$expose_functions()`][fit-method-expose_functions] | Expose Stan functions for use in R. |
+#' [`$init_model_methods()`][fit-method-init_model_methods] | Expose methods for log-probability, gradients, parameter constraining and unconstraining. |
+#' [`$log_prob()`][fit-method-log_prob] | Calculate log-prob. |
+#' [`$grad_log_prob()`][fit-method-grad_log_prob] | Calculate log-prob and gradient. |
+#' [`$hessian()`][fit-method-hessian] | Calculate log-prob, gradient, and hessian. |
+#' [`$constrain_variables()`][fit-method-constrain_variables] | Transform a set of unconstrained parameter values to the constrained scale. |
+#' [`$unconstrain_variables()`][fit-method-unconstrain_variables] | Transform a set of parameter values to the unconstrained scale. |
+#' [`$unconstrain_draws()`][fit-method-unconstrain_draws] | Transform all parameter draws to the unconstrained scale. |
+#' [`$variable_skeleton()`][fit-method-variable_skeleton] | Helper function to re-structure a vector of constrained parameter values. |
+#'
CmdStanVB <- R6::R6Class(
classname = "CmdStanVB",
inherit = CmdStanFit,
diff --git a/R/model.R b/R/model.R
index e95c13759..f32f7e424 100644
--- a/R/model.R
+++ b/R/model.R
@@ -188,6 +188,7 @@ cmdstan_model <- function(stan_file = NULL, exe_file = NULL, compile = TRUE, ...
#' [`$exe_file()`][model-method-compile] | Return the file path to the compiled executable. |
#' [`$hpp_file()`][model-method-compile] | Return the file path to the `.hpp` file containing the generated C++ code. |
#' [`$save_hpp_file()`][model-method-compile] | Save the `.hpp` file containing the generated C++ code. |
+#' [`$expose_functions()`][model-method-expose_functions] | Expose Stan functions for use in R. |
#'
#' ## Model fitting
#'
@@ -325,10 +326,6 @@ CmdStanModel <- R6::R6Class(
"- ", new_hpp_loc)
private$hpp_file_ <- new_hpp_loc
invisible(private$hpp_file_)
- },
- expose_functions = function(global = FALSE, verbose = FALSE) {
- expose_functions(self$functions, global, verbose)
- invisible(NULL)
}
)
)
@@ -391,10 +388,16 @@ CmdStanModel <- R6::R6Class(
#' not modified since last compiled. The default is `FALSE`. Can also be set
#' via a global `cmdstanr_force_recompile` option.
#' @param compile_model_methods (logical) Compile additional model methods
-#' (`log_prob()`, `grad_log_prob()`, `constrain_pars()`, `unconstrain_pars()`)
+#' (`log_prob()`, `grad_log_prob()`, `constrain_variables()`,
+#' `unconstrain_variables()`).
#' @param compile_hessian_method (logical) Should the (experimental) `hessian()` method be
#' be compiled with the model methods?
-#' @param compile_standalone (logical) Should functions in the Stan model be compiled for used in R?
+#' @param compile_standalone (logical) Should functions in the Stan model be
+#' compiled for use in R? If `TRUE` the functions will be available via the
+#' `functions` field in the compiled model object. This can also be done after
+#' compilation using the
+#' [`$expose_functions()`][model-method-expose_functions] method.
+#'
#' @param threads Deprecated and will be removed in a future release. Please
#' turn on threading via `cpp_options = list(stan_threads = TRUE)` instead.
#'
@@ -584,7 +587,7 @@ compile <- function(quiet = TRUE,
self$functions$hpp_code <- get_standalone_hpp(temp_stan_file, stancflags_standalone)
self$functions$external <- !is.null(user_header)
if (compile_standalone) {
- expose_functions(self$functions, !quiet)
+ expose_stan_functions(self$functions, !quiet)
}
stancflags_val <- paste0("STANCFLAGS += ", stancflags_val, paste0(" ", stancflags_combined, collapse = " "))
withr::with_path(
@@ -1749,6 +1752,63 @@ diagnose <- function(data = NULL,
}
CmdStanModel$set("public", name = "diagnose", value = diagnose)
+#' Expose Stan functions to R
+#'
+#' @name model-method-expose_functions
+#' @aliases expose_functions fit-method-expose_functions
+#' @family CmdStanModel methods
+#'
+#' @description The `$expose_functions()` method of a [`CmdStanModel`] object
+#' will compile the functions in the Stan program's `functions` block and
+#' expose them for use in \R. This can also be specified via the
+#' `compile_standalone` argument to the [`$compile()`][model-method-compile]
+#' method.
+#'
+#' This method is also available for fitted model objects ([`CmdStanMCMC`], [`CmdStanVB`], etc.).
+#' See **Examples**.
+#'
+#' Note: there may be many compiler warnings emitted during compilation but
+#' these can be ignored so long as they are warnings and not errors.
+#'
+#' @param global (logical) Should the functions be added to the Global
+#' Environment? The default is `FALSE`, in which case the functions are
+#' available via the `functions` field of the R6 object.
+#' @param verbose (logical) Should detailed information about generated code be
+#' printed to the console? Defaults to `FALSE`.
+#' @template seealso-docs
+#' @examples
+#' \dontrun{
+#' stan_file <- write_stan_file(
+#' "
+#' functions {
+#' real a_plus_b(real a, real b) {
+#' return a + b;
+#' }
+#' }
+#' parameters {
+#' real x;
+#' }
+#' model {
+#' x ~ std_normal();
+#' }
+#' "
+#' )
+#' mod <- cmdstan_model(stan_file)
+#' mod$expose_functions()
+#' mod$functions$a_plus_b(1, 2)
+#'
+#' fit <- mod$sample(refresh = 0)
+#' fit$expose_functions() # already compiled because of above but this would compile them otherwise
+#' fit$functions$a_plus_b(1, 2)
+#' }
+#'
+#'
+expose_functions = function(global = FALSE, verbose = FALSE) {
+ expose_stan_functions(self$functions, global, verbose)
+ invisible(NULL)
+}
+CmdStanModel$set("public", name = "expose_functions", value = expose_functions)
+
# internal ----------------------------------------------------------------
diff --git a/R/utils.R b/R/utils.R
index 3eef2590d..7e732c0a5 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -235,7 +235,7 @@ generate_file_names <-
#' Set or get the number of threads used to execute Stan models
#'
#' DEPRECATED. Please use the `threads_per_chain` argument when fitting the model.
-#'
+#' @keywords internal
#' @name stan_threads
NULL
@@ -907,7 +907,7 @@ compile_functions <- function(env, verbose = FALSE, global = FALSE) {
invisible(NULL)
}
-expose_functions <- function(function_env, global = FALSE, verbose = FALSE) {
+expose_stan_functions <- function(function_env, global = FALSE, verbose = FALSE) {
if (os_is_wsl()) {
stop("Standalone functions are not currently available with ",
"WSL CmdStan and will not be compiled",
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 2f99caefa..ca5375be8 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -70,6 +70,7 @@ articles:
and using CmdStanR in R Markdown documents.
contents:
- cmdstanr-internals
+ - posterior
- r-markdown
- deprecations
- profiling
@@ -110,6 +111,8 @@ reference:
- write_stan_json
- write_stan_file
- draws_to_csv
+ - as_mcmc.list
+ - as_draws.CmdStanMCMC
- title: "Using CmdStanR with knitr and R Markdown"
contents:
- register_knitr_engine
diff --git a/docs/404.html b/docs/404.html
index 0b36386a3..ced1d14e2 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -1,74 +1,34 @@
-
-
-
Copyright (c) 2019, Stan Developers and their Assignees
-All rights reserved.
+
Copyright (c) 2019, Stan Developers and their Assignees All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
-
-
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission.
-
-
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This vignette demonstrates how to use the OpenCL capabilities of CmdStan with CmdStanR. The functionality described in this vignette requires CmdStan 2.26.1 or newer.
-
As of version 2.26.1, users can expect speedups with OpenCL when using vectorized probability distribution functions (functions with the _lpdf or _lpmf suffix) and when the input variables contain at least 20,000 elements.
-
The actual speedup for a model will depend on the particular lpdf/lpmf functions used and whether the lpdf/lpmf functions are the bottlenecks of the model. The more computationally complex the function is, the larger the expected speedup. The biggest speedups are expected when using the specialized GLM functions.
-
In order to establish the bottlenecks in your model we recommend using profiling, which was introduced in Stan version 2.26.0.
+
This vignette demonstrates how to use the OpenCL capabilities of
+CmdStan with CmdStanR. The functionality described in this vignette
+requires CmdStan 2.26.1 or newer.
+
As of version 2.26.1, users can expect speedups with OpenCL when
+using vectorized probability distribution functions (functions with the
+_lpdf or _lpmf suffix) and when the input
+variables contain at least 20,000 elements.
+
The actual speedup for a model will depend on the particular
+lpdf/lpmf functions used and whether the
+lpdf/lpmf functions are the bottlenecks of the model. The
+more computationally complex the function is, the larger the expected
+speedup. The biggest speedups are expected when using the specialized
+GLM functions.
+
In order to establish the bottlenecks in your model we recommend
+using profiling,
+which was introduced in Stan version 2.26.0.
OpenCL runtime
-
OpenCL is supported on most modern CPUs and GPUs. In order to use OpenCL in CmdStanR, an OpenCL runtime for the target device must be installed. A guide for the most common devices is available in the CmdStan manual’s chapter on parallelization.
-
In case of using Windows, CmdStan requires the OpenCL.lib to compile the model. If you experience issue compiling the model with OpenCL, run the below script and set path_to_opencl_lib to the path to the OpenCL.lib file on your system. If you are using CUDA, the path should be similar to the one listed here.
+
OpenCL is supported on most modern CPUs and GPUs. In order to use
+OpenCL in CmdStanR, an OpenCL runtime for the target device must be
+installed. A guide for the most common devices is available in the
+CmdStan manual’s chapter
+on parallelization.
+
In case of using Windows, CmdStan requires the
+OpenCL.lib to compile the model. If you experience issue
+compiling the model with OpenCL, run the below script and set
+path_to_opencl_lib to the path to the
+OpenCL.lib file on your system. If you are using CUDA, the
+path should be similar to the one listed here.
By default, models in CmdStanR are compiled without OpenCL support. Once OpenCL support is enabled, a CmdStan model will make use of OpenCL if the functions in the model support it. Technically no changes to a model are required to support OpenCL since the choice of using OpenCL is handled by the compiler, but it can still be useful to rewrite a model to be more OpenCL-friendly by using vectorization as much as possible when using probability distributions.
-
Consider a simple logistic regression with parameters alpha and beta, covariates X, and outcome y.
+
By default, models in CmdStanR are compiled without OpenCL
+support. Once OpenCL support is enabled, a CmdStan model will make use
+of OpenCL if the functions in the model support it. Technically no
+changes to a model are required to support OpenCL since the choice of
+using OpenCL is handled by the compiler, but it can still be useful to
+rewrite a model to be more OpenCL-friendly by using vectorization as
+much as possible when using probability distributions.
+
Consider a simple logistic regression with parameters
+alpha and beta, covariates X, and
+outcome y.
data {
int<lower=1> k;
int<lower=0> n;
@@ -191,8 +221,14 @@
X<-matrix(rnorm(n*k), ncol =k)y<-rbinom(n, size =1, prob =plogis(3*X[,1]-2*X[,2]+1))mdata<-list(k =k, n =n, y =y, X =X)
-
In this model, most of the computation will be handled by the bernoulli_logit_glm_lpmf function. Because this is a supported GPU function, it should be possible to accelerate it with OpenCL. Check here for a list of functions with OpenCL support.
-
To build the model with OpenCL support, add cpp_options = list(stan_opencl = TRUE) at the compilation step.
+
In this model, most of the computation will be handled by the
+bernoulli_logit_glm_lpmf function. Because this is a
+supported GPU function, it should be possible to accelerate it with
+OpenCL. Check here for a
+list of functions with OpenCL support.
+
To build the model with OpenCL support, add
+cpp_options = list(stan_opencl = TRUE) at the compilation
+step.
# Compile the model with STAN_OPENCL=TRUEmod_cl<-cmdstan_model("opencl-files/bernoulli_logit_glm.stan",
@@ -201,46 +237,60 @@
Running models with OpenCL
-
Running models with OpenCL requires specifying the OpenCL platform and device on which to run the model (there can be multiple). If the system has one GPU and no OpenCL CPU runtime, the platform and device IDs of the GPU are typically both 0, but the clinfo tool can be used to figure out for sure which devices are available.
-
On an Ubuntu system with both CPU and GPU OpenCL support, clinfo -l outputs:
+
Running models with OpenCL requires specifying the OpenCL platform
+and device on which to run the model (there can be multiple). If the
+system has one GPU and no OpenCL CPU runtime, the platform and device
+IDs of the GPU are typically both 0, but the
+clinfo tool can be used to figure out for sure which
+devices are available.
+
On an Ubuntu system with both CPU and GPU OpenCL support,
+clinfo -l outputs:
Platform #0: AMD Accelerated Parallel Processing
`-- Device #0: gfx906+sram-ecc
Platform #1: Intel(R) CPU Runtime for OpenCL(TM) Applications
`-- Device #0: Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz
-
On this system the GPU is platform ID 0 and device ID 0, while the CPU is platform ID 1, device ID 0. These can be specified with the opencl_ids argument when running a model. The opencl_ids is supplied as a vector of length 2, where the first element is the platform ID and the second argument is the device ID.
+
On this system the GPU is platform ID 0 and device ID 0, while the
+CPU is platform ID 1, device ID 0. These can be specified with the
+opencl_ids argument when running a model. The
+opencl_ids is supplied as a vector of length 2, where the
+first element is the platform ID and the second argument is the device
+ID.
This speedup will be determined by the particular GPU/CPU used, the input problem sizes (data as well as parameters) and if the model uses functions that can be run on the GPU or other OpenCL devices.
+
[1] 5.065968
+
This speedup will be determined by the particular GPU/CPU used, the
+input problem sizes (data as well as parameters) and if the model uses
+functions that can be run on the GPU or other OpenCL devices.
Each variable is represented as a list containing the type
+information (currently limited to real or int)
+and the number of dimensions.
-variables$data$J
+variables$data$J
$type
[1] "int"
$dimensions
[1] 0
-variables$data$sigma
+variables$data$sigma
$type
[1] "real"
$dimensions
[1] 1
-variables$parameters$tau
+variables$parameters$tau
$type
[1] "real"
$dimensions
[1] 0
-variables$transformed_parameters$theta
+variables$transformed_parameters$theta
$type
[1] "real"
$dimensions
[1] 1
-
-
-Executable location
-
By default, the executable is created in the same directory as the file containing the Stan program. You can also specify a different location with the dir argument.
+
+
Executable location
+
+
By default, the executable is created in the same directory as the
+file containing the Stan program. You can also specify a different
+location with the dir argument.
-mod<-cmdstan_model(stan_file, dir ="path/to/directory/for/executable")
+mod<-cmdstan_model(stan_file, dir ="path/to/directory/for/executable")
-
-
-Processing data
-
There are three data formats that CmdStanR allows when fitting a model:
+
+
Processing data
+
+
There are three data formats that CmdStanR allows when fitting a
+model:
named list of R objects
JSON file
R dump file
-
-
-Named list of R objects
-
Like the RStan interface, CmdStanR accepts a named list of R objects where the names correspond to variables declared in the data block of the Stan program. In the Bernoulli model the data is N, the number of data points, and y an integer array of observations.
+
+
Named list of R objects
+
+
Like the RStan interface, CmdStanR accepts a named list of R objects
+where the names correspond to variables declared in the data block of
+the Stan program. In the Bernoulli model the data is N, the
+number of data points, and y an integer array of
+observations.
-# data block has 'N' and 'y'
-data_list<-list(N =10, y =c(0,1,0,0,0,0,0,0,0,1))
-fit<-mod$sample(data =data_list)
-
Because CmdStan doesn’t accept lists of R objects, CmdStanR will first write the data to a temporary JSON file using write_stan_json(). This happens internally, but it is also possible to call write_stan_json() directly.
+# data block has 'N' and 'y'
+data_list<-list(N =10, y =c(0,1,0,0,0,0,0,0,0,1))
+fit<-mod$sample(data =data_list)
+
Because CmdStan doesn’t accept lists of R objects, CmdStanR will
+first write the data to a temporary JSON file using
+write_stan_json(). This happens internally, but it is also
+possible to call write_stan_json() directly.
If you already have your data in a JSON file you can just pass that file directly to CmdStanR instead of using a list of R objects. For example, we could pass in the JSON file we created above using write_stan_json():
+
+
JSON file
+
+
If you already have your data in a JSON file you can just pass that
+file directly to CmdStanR instead of using a list of R objects. For
+example, we could pass in the JSON file we created above using
+write_stan_json():
-fit<-mod$sample(data =json_file)
+fit<-mod$sample(data =json_file)
-
-
-R dump file
-
Finally, it is also possible to use the R dump file format. This is not recommended because CmdStan can process JSON faster than R dump, but CmdStanR allows it because CmdStan will accept files created by rstan::stan_rdump():
+
+
R dump file
+
+
Finally, it is also possible to use the R dump file format. This is
+not recommended because CmdStan can process JSON faster than R
+dump, but CmdStanR allows it because CmdStan will accept files created
+by rstan::stan_rdump():
To save these files to a non-temporary location there are two options. You can either specify the output_dir argument to mod$sample() or use fit$save_output_files() after fitting the model.
+
+
Non-temporary files
+
+
To save these files to a non-temporary location there are two
+options. You can either specify the output_dir argument to
+mod$sample() or use fit$save_output_files()
+after fitting the model.
-# see ?save_output_files for info on optional arguments
-fit$save_output_files(dir ="path/to/directory")
+# see ?save_output_files for info on optional arguments
+fit$save_output_files(dir ="path/to/directory")
-fit<-mod$sample(
- data =data_list,
- output_dir ="path/to/directory"
-)
+fit<-mod$sample(
+ data =data_list,
+ output_dir ="path/to/directory"
+)
-
-
-Reading CmdStan output into R
-
-
-Lazy CSV reading
-
With the exception of some diagnostic information, the CSV files are not read into R until their contents are requested by calling a method that requires them (e.g., fit$draws(), fit$summary(), etc.). If we examine the structure of the fit object, notice how the Private slot draws_ is NULL, indicating that the CSV files haven’t yet been read into R.
+
+
Reading CmdStan output into R
+
+
+
Lazy CSV reading
+
+
With the exception of some diagnostic information, the CSV files are
+not read into R until their contents are requested by calling a method
+that requires them (e.g., fit$draws(),
+fit$summary(), etc.). If we examine the structure of the
+fit object, notice how the Private slot
+draws_ is NULL, indicating that the CSV files
+haven’t yet been read into R.
After we call a method that requires the draws then if we reexamine the structure of the object we will see that the draws_ slot in Private is no longer empty.
+
After we call a method that requires the draws then if we reexamine
+the structure of the object we will see that the draws_
+slot in Private is no longer empty.
-draws<-fit$draws()# force CSVs to be read into R
-str(fit)
+draws<-fit$draws()# force CSVs to be read into R
+str(fit)
For models with many parameters, transformed parameters, or generated quantities, if only some are requested (e.g., by specifying the variables argument to fit$draws()) then CmdStanR will only read in the requested variables (unless they have already been read in).
+
For models with many parameters, transformed parameters, or generated
+quantities, if only some are requested (e.g., by specifying the
+variables argument to fit$draws()) then
+CmdStanR will only read in the requested variables (unless they have
+already been read in).
-
-
-read_cmdstan_csv()
-
Internally, the read_cmdstan_csv() function is used to read the CmdStan CSV files into R. This function is exposed to users, so you can also call it directly.
+
+
read_cmdstan_csv()
+
+
Internally, the read_cmdstan_csv() function is used to
+read the CmdStan CSV files into R. This function is exposed to users, so
+you can also call it directly.
-# see ?read_cmdstan_csv for info on optional arguments controlling
-# what information is read in
-csv_contents<-read_cmdstan_csv(fit$output_files())
-str(csv_contents)
+# see ?read_cmdstan_csv for info on optional arguments controlling
+# what information is read in
+csv_contents<-read_cmdstan_csv(fit$output_files())
+str(csv_contents)
List of 8
$ metadata :List of 40
..$ stan_version_major : num 2
- ..$ stan_version_minor : num 29
- ..$ stan_version_patch : num 1
- ..$ start_datetime : chr "2022-03-18 18:28:03 UTC"
+ ..$ stan_version_minor : num 32
+ ..$ stan_version_patch : num 2
+ ..$ start_datetime : chr "2023-07-25 20:55:39 UTC"
..$ method : chr "sample"
..$ save_warmup : num 0
..$ thin : num 1
@@ -594,14 +703,14 @@
This is pointless in our case since we have the original fit object, but this function can be used to create fitted model objects (CmdStanMCMC, CmdStanMLE, etc.) from any CmdStan CSV files.
This is pointless in our case since we have the original
+fit object, but this function can be used to create fitted
+model objects (CmdStanMCMC, CmdStanMLE, etc.)
+from any CmdStan CSV files.
-
-
-Saving and accessing advanced algorithm info (latent dynamics)
-
If save_latent_dynamics is set to TRUE when running the $sample() method then additional CSV files are created (one per chain) that provide access to quantities used under the hood by Stan’s implementation of dynamic Hamiltonian Monte Carlo.
-
CmdStanR does not yet provide a special method for processing these files but they can be read into R using R’s standard CSV reading functions.
+
+
Saving and accessing advanced algorithm info (latent dynamics)
+
+
If save_latent_dynamics is set to TRUE when
+running the $sample() method then additional CSV files are
+created (one per chain) that provide access to quantities used under the
+hood by Stan’s implementation of dynamic Hamiltonian Monte Carlo.
+
CmdStanR does not yet provide a special method for processing these
+files but they can be read into R using R’s standard CSV reading
+functions.
The column lp__ is also provided via fit$draws(), and the columns accept_stat__, stepsize__, treedepth__, n_leapfrog__, divergent__, and energy__ are also provided by fit$sampler_diagnostics(), but there are several columns unique to the latent dynamics file.
The column lp__ is also provided via
+fit$draws(), and the columns accept_stat__,
+stepsize__, treedepth__,
+n_leapfrog__, divergent__, and
+energy__ are also provided by
+fit$sampler_diagnostics(), but there are several columns
+unique to the latent dynamics file.
Our model has a single parameter theta and the three columns above correspond to theta in the unconstrained space (theta on the constrained space is accessed via fit$draws()), the auxiliary momentum p_theta, and the gradient g_theta. In general, each of these three columns will exist for every parameter in the model.
Our model has a single parameter theta and the three
+columns above correspond to theta in the
+unconstrained space (theta on the constrained
+space is accessed via fit$draws()), the auxiliary momentum
+p_theta, and the gradient g_theta. In general,
+each of these three columns will exist for every parameter in
+the model.
-
-
-Saving fitted model objects
-
As described above, the contents of the CSV files are only read into R when they are needed. This means that in order to save a fitted model object containing all of the posterior draws and sampler diagnostics you should either make sure to call fit$draws() and fit$sampler_diagnostics() before saving the object fit, or use the special $save_object() method provided by CmdStanR, which will ensure that everything has been read into R before saving the object using saveRDS().
+
+
Saving fitted model objects
+
+
As described above, the contents of the CSV files are only read into
+R when they are needed. This means that in order to save a fitted model
+object containing all of the posterior draws and sampler
+diagnostics you should either make sure to call fit$draws()
+and fit$sampler_diagnostics() before saving the object
+fit, or use the special $save_object() method
+provided by CmdStanR, which will ensure that everything has been read
+into R before saving the object using saveRDS().
-temp_rds_file<-tempfile(fileext =".RDS")# temporary file just for demonstration
-fit$save_object(file =temp_rds_file)
-
We can check that this worked by removing fit and loading it back in from the save file.
+temp_rds_file<-tempfile(fileext =".RDS")# temporary file just for demonstration
+fit$save_object(file =temp_rds_file)
+
We can check that this worked by removing fit and
+loading it back in from the save file.
CmdStanR can of course be used for developing other packages that require compiling and running Stan models as well as using new or custom Stan features available through CmdStan.
-
-
-Troubleshooting and debugging
-
When developing or testing new features it might be useful to have more information on how CmdStan is called internally and to see more information printed when compiling or running models. This can be enabled for an entire R session by setting the option "cmdstanr_verbose" to TRUE.
+
+
Developing using CmdStanR
+
+
CmdStanR can of course be used for developing other packages that
+require compiling and running Stan models as well as using new or custom
+Stan features available through CmdStan.
+
+
Troubleshooting and debugging
+
+
When developing or testing new features it might be useful to have
+more information on how CmdStan is called internally and to see more
+information printed when compiling or running models. This can be
+enabled for an entire R session by setting the option
+"cmdstanr_verbose" to TRUE.
CmdStanR is a lightweight interface to Stan for R users (see CmdStanPy for Python) that provides an alternative to the traditional RStan interface. See the Comparison with RStan section later in this vignette for more details on how the two interfaces differ.
-
CmdStanR is not on CRAN yet, but the beta release can be installed by running the following command in R.
+
+
Introduction
+
+
CmdStanR (Command Stan R) is a lightweight interface to Stan for R users that provides an
+alternative to the traditional RStan interface. See the Comparison with RStan section
+later in this vignette for more details on how the two interfaces
+differ.
+
CmdStanR is not on CRAN yet, but the beta release
+can be installed by running the following command in R.
-# we recommend running this is a fresh R session or restarting your current session
-install.packages("cmdstanr", repos =c("https://mc-stan.org/r-packages/", getOption("repos")))
-
CmdStanR (the cmdstanr R package) can now be loaded like any other R package. We’ll also load the bayesplot and posterior packages to use later in examples.
+# we recommend running this is a fresh R session or restarting your current session
+install.packages("cmdstanr", repos =c("https://mc-stan.org/r-packages/", getOption("repos")))
+
CmdStanR (the cmdstanr R package) can now be loaded
+like any other R package. We’ll also load the bayesplot
+and posterior packages to use later in examples.
CmdStanR requires a working installation of CmdStan, the shell interface to Stan. If you don’t have CmdStan installed then CmdStanR can install it for you, assuming you have a suitable C++ toolchain. The requirements are described in the CmdStan Guide:
CmdStanR requires a working installation of CmdStan,
+the shell interface to Stan. If you don’t have CmdStan installed then
+CmdStanR can install it for you, assuming you have a suitable C++
+toolchain. The requirements are described in the CmdStan Guide:
Before CmdStanR can be used it needs to know where the CmdStan installation is located. When the package is loaded it tries to help automate this to avoid having to manually set the path every session:
Before CmdStanR can be used it needs to know where the CmdStan
+installation is located. When the package is loaded it tries to help
+automate this to avoid having to manually set the path every
+session:
-
If the environment variable "CMDSTAN" exists at load time then its value will be automatically set as the default path to CmdStan for the R session. This is useful if your CmdStan installation is not located in the default directory that would have been used by install_cmdstan() (see #2).
-
If no environment variable is found when loaded but any directory in the form ".cmdstan/cmdstan-[version]", for example ".cmdstan/cmdstan-2.23.0", exists in the user’s home directory (Sys.getenv("HOME"), not the current working directory) then the path to the CmdStan with the largest version number will be set as the path to CmdStan for the R session. This is the same as the default directory that install_cmdstan() uses to install the latest version of CmdStan, so if that’s how you installed CmdStan you shouldn’t need to manually set the path to CmdStan when loading CmdStanR.
+
If the environment variable "CMDSTAN" exists at load
+time then its value will be automatically set as the default path to
+CmdStan for the R session. This is useful if your CmdStan installation
+is not located in the default directory that would have been used by
+install_cmdstan() (see #2).
+
If no environment variable is found when loaded but any directory
+in the form ".cmdstan/cmdstan-[version]", for example
+".cmdstan/cmdstan-2.23.0", exists in the user’s home
+directory (Sys.getenv("HOME"), not the current
+working directory) then the path to the CmdStan with the largest version
+number will be set as the path to CmdStan for the R session. This is the
+same as the default directory that install_cmdstan() uses
+to install the latest version of CmdStan, so if that’s how you installed
+CmdStan you shouldn’t need to manually set the path to CmdStan when
+loading CmdStanR.
-
If neither of these applies (or you want to subsequently change the path) you can use the set_cmdstan_path() function:
+
If neither of these applies (or you want to subsequently change the
+path) you can use the set_cmdstan_path() function:
The cmdstan_model() function creates a new CmdStanModel object from a file containing a Stan program. Under the hood, CmdStan is called to translate a Stan program to C++ and create a compiled executable. Here we’ll use the example Stan program that comes with the CmdStan installation:
The cmdstan_model() function creates a new CmdStanModel
+object from a file containing a Stan program. Under the hood, CmdStan is
+called to translate a Stan program to C++ and create a compiled
+executable. Here we’ll use the example Stan program that comes with the
+CmdStan installation:
The object mod is an R6 reference object of class CmdStanModel and behaves similarly to R’s reference class objects and those in object oriented programming languages. Methods are accessed using the $ operator. This design choice allows for CmdStanR and CmdStanPy to provide a similar user experience and share many implementation details.
-
The Stan program can be printed using the $print() method:
The object mod is an R6 reference object of class CmdStanModel
+and behaves similarly to R’s reference class objects and those in object
+oriented programming languages. Methods are accessed using the
+$ operator. This design choice allows for CmdStanR and CmdStanPy to provide a
+similar user experience and share many implementation details.
+
The Stan program can be printed using the $print()
+method:
The $sample() method for CmdStanModel objects runs Stan’s default MCMC algorithm. The data argument accepts a named list of R objects (like for RStan) or a path to a data file compatible with CmdStan (JSON or R dump).
The $sample()
+method for CmdStanModel
+objects runs Stan’s default MCMC algorithm. The data
+argument accepts a named list of R objects (like for RStan) or a path to
+a data file compatible with CmdStan (JSON or R dump).
-# names correspond to the data block in the Stan program
-data_list<-list(N =10, y =c(0,1,0,0,0,0,0,0,0,1))
-
-fit<-mod$sample(
- data =data_list,
- seed =123,
- chains =4,
- parallel_chains =4,
- refresh =500# print update every 500 iters
-)
+# names correspond to the data block in the Stan program
+data_list<-list(N =10, y =c(0,1,0,0,0,0,0,0,0,1))
+
+fit<-mod$sample(
+ data =data_list,
+ seed =123,
+ chains =4,
+ parallel_chains =4,
+ refresh =500# print update every 500 iters
+)
The $sample() method creates R6CmdStanMCMC objects, which have many associated methods. Below we will demonstrate some of the most important methods. For a full list, follow this link to the CmdStanMCMC documentation:
+
The $sample() method creates R6CmdStanMCMC objects,
+which have many associated methods. Below we will demonstrate some of
+the most important methods. For a full list, follow this link to the
+CmdStanMCMC documentation:
The $summary() method calls summarise_draws() from the posterior package. The first argument specifies the variables to summarize and any arguments after that are passed on to posterior::summarise_draws() to specify which summaries to compute, whether to use multiple cores, etc.
+
+
Posterior summary statistics
+
+
+
Summaries from the posterior package
+
+
The $summary()
+method calls summarise_draws() from the
+posterior package. The first argument specifies the
+variables to summarize and any arguments after that are passed on to
+posterior::summarise_draws() to specify which summaries to
+compute, whether to use multiple cores, etc.
CmdStan itself provides a stansummary utility that can be called using the $cmdstan_summary() method. This method will print summaries but won’t return anything.
-
-
-
-
-Posterior draws
-
-
-Extracting draws
-
The $draws() method can be used to extract the posterior draws in formats provided by the posterior package. Here we demonstrate only the draws_array and draws_df formats, but the posterior package supports other useful formats as well.
-
-# default is a 3-D draws_array object from the posterior package
-# iterations x chains x variables
-draws_arr<-fit$draws()# or format="array"
-str(draws_arr)
+fit$summary()
+fit$summary(variables =c("theta", "lp__"), "mean", "sd")
+
+# use a formula to summarize arbitrary functions, e.g. Pr(theta <= 0.5)
+fit$summary("theta", pr_lt_half =~mean(.<=0.5))
+
+# summarise all variables with default and additional summary measures
+fit$summary(
+ variables =NULL,
+posterior::default_summary_measures(),
+ extra_quantiles =~posterior::quantile2(., probs =c(.0275, .975))
+)
CmdStan itself provides a stansummary utility that can
+be called using the $cmdstan_summary() method. This method
+will print summaries but won’t return anything.
+
+
+
+
Posterior draws
+
+
+
Extracting draws
+
+
The $draws()
+method can be used to extract the posterior draws in formats provided by
+the posterior
+package. Here we demonstrate only the draws_array and
+draws_df formats, but the posterior
+package supports other useful formats as well.
+
+# default is a 3-D draws_array object from the posterior package
+# iterations x chains x variables
+draws_arr<-fit$draws()# or format="array"
+str(draws_arr)
# A draws_df: 1000 iterations, 4 chains, and 2 variables
lp__ theta
1 -6.8 0.28
@@ -347,41 +414,52 @@
10 -7.5 0.42
# ... with 3990 more draws
# ... hidden reserved variables {'.chain', '.iteration', '.draw'}
-
To convert an existing draws object to a different format use the posterior::as_draws_*() functions.
-
-# this should be identical to draws_df created via draws(format = "df")
-draws_df_2<-as_draws_df(draws_arr)
-identical(draws_df, draws_df_2)
+
To convert an existing draws object to a different format use the
+posterior::as_draws_*() functions.
+
+# this should be identical to draws_df created via draws(format = "df")
+draws_df_2<-as_draws_df(draws_arr)
+identical(draws_df, draws_df_2)
[1] TRUE
-
In general, converting to a different draws format in this way will be slower than just setting the appropriate format initially in the call to the $draws() method, but in most cases the speed difference will be minor.
-
-
-
-Plotting draws
-
Plotting posterior distributions is as easy as passing the object returned by the $draws() method directly to plotting functions in our bayesplot package.
In general, converting to a different draws format in this way will
+be slower than just setting the appropriate format initially in the call
+to the $draws() method, but in most cases the speed
+difference will be minor.
+
+
+
Plotting draws
+
+
Plotting posterior distributions is as easy as passing the object
+returned by the $draws() method directly to plotting
+functions in our bayesplot
+package.
-Extracting diagnostic values for each iteration and chain
-
The $sampler_diagnostics() method extracts the values of the sampler parameters (treedepth__, divergent__, etc.) in formats supported by the posterior package. The default is as a 3-D array (iteration x chain x variable).
-
-# this is a draws_array object from the posterior package
-str(fit$sampler_diagnostics())
+
+
Sampler diagnostics
+
+
+
Extracting diagnostic values for each iteration and chain
+
+
The $sampler_diagnostics()
+method extracts the values of the sampler parameters
+(treedepth__, divergent__, etc.) in formats
+supported by the posterior package. The default is as a
+3-D array (iteration x chain x variable).
+
+# this is a draws_array object from the posterior package
+str(fit$sampler_diagnostics())
We see the number of divergences for each of the four chains, the number of times the maximum treedepth was hit for each chain, and the E-BFMI for each chain.
-
In this case there were no warnings, so in order to demonstrate the warning messages we’ll use one of the CmdStanR example models that suffers from divergences.
Warning: 76 of 4000 (2.0%) transitions ended with a divergence.
+[1] 1.0 1.3 1.1 1.2
+
We see the number of divergences for each of the four chains, the
+number of times the maximum treedepth was hit for each chain, and the
+E-BFMI for each chain.
+
In this case there were no warnings, so in order to demonstrate the
+warning messages we’ll use one of the CmdStanR example models that
+suffers from divergences.
-# number of divergences reported in warning is the sum of the per chain values
-sum(diagnostics$num_divergent)
-
[1] 76
-
-
-
-CmdStan’s diagnose utility
-
CmdStan itself provides a diagnose utility that can be called using the $cmdstan_diagnose() method. This method will print warnings but won’t return anything.
-
-
-
-
-Create a stanfit object
-
If you have RStan installed then it is also possible to create a stanfit object from the csv output files written by CmdStan. This can be done by using rstan::read_stan_csv() in combination with the $output_files() method of the CmdStanMCMC object. This is only needed if you want to fit a model with CmdStanR but already have a lot of post-processing code that assumes a stanfit object. Otherwise we recommend using the post-processing functionality provided by CmdStanR itself.
+[1] 0.29 0.24 0.26 0.30
+
+# number of divergences reported in warning is the sum of the per chain values
+sum(diagnostics$num_divergent)
+
[1] 185
+
+
+
CmdStan’s diagnose utility
+
+
CmdStan itself provides a diagnose utility that can be
+called using the $cmdstan_diagnose() method. This method
+will print warnings but won’t return anything.
+
+
+
+
Create a stanfit object
+
+
If you have RStan installed then it is also possible to create a
+stanfit object from the csv output files written by
+CmdStan. This can be done by using rstan::read_stan_csv()
+in combination with the $output_files() method of the
+CmdStanMCMC object. This is only needed if you want to fit
+a model with CmdStanR but already have a lot of post-processing code
+that assumes a stanfit object. Otherwise we recommend using
+the post-processing functionality provided by CmdStanR itself.
CmdStanR also supports running Stan’s optimization algorithms and its
+algorithms for variational approximation of full Bayesian inference.
+These are run via the $optimize() and
+$variational() methods, which are called in a similar way
+to the $sample() method demonstrated above.
+
+
Optimization
+
+
We can find the (penalized) maximum likelihood estimate (MLE) using
+$optimize().
CmdStanR also supports running Stan’s optimization algorithms and its algorithms for variational approximation of full Bayesian inference. These are run via the $optimize() and $variational() methods, which are called in a similar way to the $sample() method demonstrated above.
-
-
-Optimization
-
We can find the (penalized) maximum likelihood estimate (MLE) using $optimize().
------------------------------------------------------------
EXPERIMENTAL ALGORITHM:
This procedure has not been thoroughly tested and may be unstable
or buggy. The interface is subject to change.
------------------------------------------------------------
-Gradient evaluation took 4e-06 seconds
-1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds.
+Gradient evaluation took 6e-06 seconds
+1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds.
Adjust your expectations accordingly!
Begin eta adaptation.
Iteration: 1 / 250 [ 0%] (Adaptation)
@@ -509,92 +609,115 @@
Drawing a sample of size 4000 from the approximate posterior...
COMPLETED.
Finished in 0.1 seconds.
-
-fit_vb$summary("theta")
-
# A tibble: 1 × 7
- variable mean median sd mad q5 q95
- <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
-1 theta 0.267 0.250 0.117 0.117 0.105 0.487
-
The $draws() method can be used to access the approximate posterior draws. Let’s extract the draws, make the same plot we made after MCMC, and compare the two. In this trivial example the distributions look quite similar, although the variational approximation slightly underestimates the posterior standard deviation.
variable mean median sd mad q5 q95
+1 theta 0.27 0.25 0.12 0.12 0.1 0.49
+
The $draws() method can be used to access the
+approximate posterior draws. Let’s extract the draws, make the same plot
+we made after MCMC, and compare the two. In this trivial example the
+distributions look quite similar, although the variational approximation
+slightly underestimates the posterior standard deviation.
In order to save a fitted model object to disk and ensure that all of the contents are available when reading the object back into R, we recommend using the $save_object() method provided by CmdStanR. The reason for this is discussed in detail in the vignette How does CmdStanR work?, so here we just demonstrate how to use the method.
-
-fit$save_object(file ="fit.RDS")
-
-# can be read back in using readRDS
-fit2<-readRDS("fit.RDS")
-
-
-
-Comparison with RStan
-
-
-Different ways of interfacing with Stan’s C++
-
The RStan interface (rstan package) is an in-memory interface to Stan and relies on R packages like Rcpp and inline to call C++ code from R. On the other hand, the CmdStanR interface does not directly call any C++ code from R, instead relying on the CmdStan interface behind the scenes for compilation, running algorithms, and writing results to output files.
-
-
-
-Advantages of RStan
-
-
Advanced features. We are working on making these available outside of RStan but currently they are only available to R users via RStan:
+
+
Saving fitted model objects
+
+
In order to save a fitted model object to disk and ensure that all of
+the contents are available when reading the object back into R, we
+recommend using the $save_object()
+method provided by CmdStanR. The reason for this is discussed in detail
+in the vignette How
+does CmdStanR work?, so here we just demonstrate how to use the
+method.
+
+fit$save_object(file ="fit.RDS")
+
+# can be read back in using readRDS
+fit2<-readRDS("fit.RDS")
+
+
+
Comparison with RStan
+
+
+
Different ways of interfacing with Stan’s C++
+
+
The RStan interface (rstan package) is
+an in-memory interface to Stan and relies on R packages like
+Rcpp and inline to call C++ code from
+R. On the other hand, the CmdStanR interface does not directly call any
+C++ code from R, instead relying on the CmdStan interface behind the
+scenes for compilation, running algorithms, and writing results to
+output files.
Allows other developers to distribute R packages with pre-compiled Stan programs (like rstanarm) on CRAN.
+
Allows other developers to distribute R packages with
+pre-compiled Stan programs (like rstanarm) on
+CRAN.
+
Avoids use of R6 classes, which may result in more familiar
+syntax for many R users.
-
-
-Advantages of CmdStanR
+
+
Advantages of CmdStanR
+
-
Compatible with latest versions of Stan. Keeping up with Stan releases is complicated for RStan, often requiring non-trivial changes to the rstan package and new CRAN releases of both rstan and StanHeaders. With CmdStanR the latest improvements in Stan will be available from R immediately after updating CmdStan using cmdstanr::install_cmdstan().
-
Fewer installation issues (e.g., no need to mess with Makevars files).
-
Running Stan via external processes results in fewer unexpected crashes, especially in RStudio.
+
Compatible with latest versions of Stan. Keeping up with Stan
+releases is complicated for RStan, often requiring non-trivial changes
+to the rstan package and new CRAN releases of both
+rstan and StanHeaders. With CmdStanR
+the latest improvements in Stan will be available from R immediately
+after updating CmdStan using
+cmdstanr::install_cmdstan().
+
Running Stan via external processes results in fewer unexpected
+crashes, especially in RStudio.
Less memory overhead.
-
More permissive license. RStan uses the GPL-3 license while the license for CmdStanR is BSD-3, which is a bit more permissive and is the same license used for CmdStan and the Stan C++ source code.
+
More permissive license. RStan uses the GPL-3 license while the
+license for CmdStanR is BSD-3, which is a bit more permissive and is the
+same license used for CmdStan and the Stan C++ source code.
-
-
-Additional resources
-
There are additional vignettes available that discuss other aspects of using CmdStanR. These can be found online at the CmdStanR website:
+
+
Additional resources
+
+
There are additional vignettes available that discuss other aspects
+of using CmdStanR. These can be found online at the CmdStanR
+website:
To report a bug, suggest a feature (including additions to these vignettes), or to start contributing to CmdStanR development (new contributors welcome!) please open an issue on GitHub:
+
To report a bug, suggest a feature (including additions to these
+vignettes), or to start contributing to CmdStanR development (new
+contributors welcome!) please open an issue on GitHub: