diff --git a/.Rbuildignore b/.Rbuildignore index 691c7f148..8c1f77aae 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,8 +7,8 @@ ^docs$ ^pkgdown$ ^man-roxygen$ -^dev-helpers\.R$ ^LICENSE\.md$ ^\.appveyor\.yml$ ^\.github$ ^vignettes/articles-online-only$ +^release-prep\.R$ diff --git a/.gitignore b/.gitignore index 8dc61d80a..db3f1c32c 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,6 @@ .Ruserdata .vscode/* -design-questions/* inst/doc dev-helpers.R +release-prep.R diff --git a/DESCRIPTION b/DESCRIPTION index 46a5814db..b656d0d98 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: cmdstanr Title: R Interface to 'CmdStan' -Version: 0.5.3 -Date: 2022-04-24 +Version: 0.6.0 +Date: 2023-07-25 Authors@R: c(person(given = "Jonah", family = "Gabry", role = c("aut", "cre"), email = "jsg2201@columbia.edu"), diff --git a/NEWS.md b/NEWS.md index 495c4d477..863988233 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,37 @@ +# cmdstanr 0.6.0 + +### Major new features + +* New `expose_functions()` method to expose Stan functions to R by @andrjohns in #702. See `?expose_functions`. +* New methods for accessing log_prob, grad_log_prob, hessian, un/constrain variables by @andrjohns in #701. See `?init_model_methods`. + +### Other changes + +* mod$variables works w includes in precompile state (fix #680) by @MKyhos in #682 +* Update broken link for Stan OpenCL support page by @erictleung in #686 +* Add newline to check syntax output by @rok-cesnovar in #689 +* Allow exposing functions without sampling by @andrjohns in #705 +* Expose skeleton by @andrjohns in #706 +* WSL - Run cmdstan and models under WSL filesystem by @andrjohns in #696 +* Bugfix - Deep copy method/function environments by @andrjohns in #709 +* Add option for including jacobian adjustments in hessian method by @andrjohns in #710 +* WSL Optimisations and Bugfixes for CI by @andrjohns in #711 +* add stancflags from make/local by @rok-cesnovar in #690 +* Update co-authors by @andrjohns in #715 +* Update model methods parameter naming and extract skeleton function by @andrjohns in #724 +* Add method for unconstraining all parameter draws by @andrjohns in #729 +* Improve efficiency of variable matching by @sbfnk in #736 +* Add verbosity to download output and errors by @andrjohns in #745 +* Update handling of show_messages, add show_exceptions by @andrjohns in #746 +* Rtools43 support by @andrjohns in #755 +* Add stanc M1 make patch, suppress boost warnings by @andrjohns in #756 +* more examples of summary method by @gravesti in #751 +* Fix model$format and model$check_syntax for compiled models with include-paths by @adrian-lison in #775 +* Generalise RTools config/support by @andrjohns in #777 +* New posterior vignette by @gravesti in #719 +* Add moment-matching support to $loo() method by @andrjohns in #778 +* replace \ with function by @jsocolar in #789 + # cmdstanr 0.5.3 ### New features diff --git a/R/csv.R b/R/csv.R index 9ab528e1f..5d3824428 100644 --- a/R/csv.R +++ b/R/csv.R @@ -422,7 +422,7 @@ read_cmdstan_csv <- function(files, #' Read CmdStan CSV files from sampling into \R #' #' Deprecated. Use [read_cmdstan_csv()] instead. -#' +#' @keywords internal #' @export #' @param files,variables,sampler_diagnostics Deprecated. Use #' [read_cmdstan_csv()] instead. diff --git a/R/example.R b/R/example.R index 388c555ef..755f33703 100644 --- a/R/example.R +++ b/R/example.R @@ -188,7 +188,7 @@ write_stan_file <- function(code, #' Write Stan code to a temporary file #' #' This function is deprecated. Please use [write_stan_file()] instead. -#' +#' @keywords internal #' @export #' @inheritParams write_stan_file write_stan_tempfile <- function(code, dir = tempdir()) { diff --git a/R/fit.R b/R/fit.R index 0bffa1cc5..f9dd4d8d2 100644 --- a/R/fit.R +++ b/R/fit.R @@ -82,7 +82,7 @@ CmdStanFit <- R6::R6Class( invisible(self) }, expose_functions = function(global = FALSE, verbose = FALSE) { - expose_functions(self$functions, global, verbose) + expose_stan_functions(self$functions, global, verbose) invisible(NULL) } ), @@ -301,12 +301,18 @@ init <- function() { CmdStanFit$set("public", name = "init", value = init) #' Compile additional methods for accessing the model log-probability function -#' and parameter constraining and unconstraining. This requires the `Rcpp` package. +#' and parameter constraining and unconstraining. #' #' @name fit-method-init_model_methods #' @aliases init_model_methods -#' @description The `$init_model_methods()` compiles and initializes the -#' `log_prob`, `grad_log_prob`, `constrain_variables`, and `unconstrain_variables` functions. +#' +#' @description The `$init_model_methods()` method compiles and initializes the +#' `log_prob`, `grad_log_prob`, `constrain_variables`, `unconstrain_variables` +#' and `unconstrain_draws` functions. These are then available as methods of +#' the fitted model object. This requires the `Rcpp` package. +#' +#' Note: there may be many compiler warnings emitted during compilation but +#' these can be ignored so long as they are warnings and not errors. #' #' @param seed (integer) The random seed to use when initializing the model. #' @param verbose (boolean) Whether to show verbose logging during compilation. @@ -317,6 +323,9 @@ CmdStanFit$set("public", name = "init", value = init) #' fit_mcmc <- cmdstanr_example("logistic", method = "sample") #' fit_mcmc$init_model_methods() #' } +#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()], +#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()], +#' [hessian()] #' init_model_methods <- function(seed = 0, verbose = FALSE, hessian = FALSE) { if (os_is_wsl()) { @@ -358,9 +367,13 @@ CmdStanFit$set("public", name = "init_model_methods", value = init_model_methods #' \dontrun{ #' fit_mcmc <- cmdstanr_example("logistic", method = "sample") #' fit_mcmc$init_model_methods() -#' fit_mcmc$log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1)) +#' fit_mcmc$log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2)) #' } #' +#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()], +#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()], +#' [hessian()] +#' log_prob <- function(unconstrained_variables, jacobian_adjustment = TRUE) { if (is.null(private$model_methods_env_$model_ptr)) { stop("The method has not been compiled, please call `init_model_methods()` first", @@ -383,18 +396,22 @@ CmdStanFit$set("public", name = "log_prob", value = log_prob) #' @description The `$grad_log_prob()` method provides access to the #' Stan model's `log_prob` function and its derivative #' -#' @param unconstrained_variables (numeric) A vector of unconstrained parameters to be passed -#' to `grad_log_prob` -#' @param jacobian_adjustment (bool) Whether to include the log-density adjustments from -#' un/constraining variables +#' @param unconstrained_variables (numeric) A vector of unconstrained parameters +#' to be passed to `grad_log_prob`. +#' @param jacobian_adjustment (bool) Whether to include the log-density +#' adjustments from un/constraining variables. #' #' @examples #' \dontrun{ #' fit_mcmc <- cmdstanr_example("logistic", method = "sample") #' fit_mcmc$init_model_methods() -#' fit_mcmc$grad_log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1)) +#' fit_mcmc$grad_log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2)) #' } #' +#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()], +#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()], +#' [hessian()] +#' grad_log_prob <- function(unconstrained_variables, jacobian_adjustment = TRUE) { if (is.null(private$model_methods_env_$model_ptr)) { stop("The method has not been compiled, please call `init_model_methods()` first", @@ -417,18 +434,22 @@ CmdStanFit$set("public", name = "grad_log_prob", value = grad_log_prob) #' @description The `$hessian()` method provides access to the #' Stan model's `log_prob`, its derivative, and its hessian #' -#' @param unconstrained_variables (numeric) A vector of unconstrained parameters to be passed -#' to `hessian` -#' @param jacobian_adjustment (bool) Whether to include the log-density adjustments from -#' un/constraining variables +#' @param unconstrained_variables (numeric) A vector of unconstrained parameters +#' to be passed to `hessian`. +#' @param jacobian_adjustment (bool) Whether to include the log-density +#' adjustments from un/constraining variables. #' #' @examples #' \dontrun{ -#' fit_mcmc <- cmdstanr_example("logistic", method = "sample") -#' fit_mcmc$init_model_methods() -#' fit_mcmc$hessian(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1)) +#' # fit_mcmc <- cmdstanr_example("logistic", method = "sample") +#' # fit_mcmc$init_model_methods(hessian = TRUE) +#' # fit_mcmc$hessian(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2)) #' } #' +#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()], +#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()], +#' [hessian()] +#' hessian <- function(unconstrained_variables, jacobian_adjustment = TRUE) { if (is.null(private$model_methods_env_$model_ptr)) { stop("The method has not been compiled, please call `init_model_methods()` first", @@ -450,8 +471,8 @@ CmdStanFit$set("public", name = "hessian", value = hessian) #' @description The `$unconstrain_variables()` method transforms input parameters to #' the unconstrained scale #' -#' @param variables (list) A list of parameter values to transform, in the same format as -#' provided to the `init` argument of the `$sample()` method +#' @param variables (list) A list of parameter values to transform, in the same +#' format as provided to the `init` argument of the `$sample()` method. #' #' @examples #' \dontrun{ @@ -460,6 +481,10 @@ CmdStanFit$set("public", name = "hessian", value = hessian) #' fit_mcmc$unconstrain_variables(list(alpha = 0.5, beta = c(0.7, 1.1, 0.2))) #' } #' +#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()], +#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()], +#' [hessian()] +#' unconstrain_variables <- function(variables) { if (is.null(private$model_methods_env_$model_ptr)) { stop("The method has not been compiled, please call `init_model_methods()` first", @@ -521,6 +546,10 @@ CmdStanFit$set("public", name = "unconstrain_variables", value = unconstrain_var #' unconstrained_draws <- fit_mcmc$unconstrain_draws(draws = fit_mcmc$draws()) #' } #' +#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()], +#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()], +#' [hessian()] +#' unconstrain_draws <- function(files = NULL, draws = NULL) { if (!is.null(files) || !is.null(draws)) { if (!is.null(files) && !is.null(draws)) { @@ -565,6 +594,7 @@ unconstrain_draws <- function(files = NULL, draws = NULL) { self$unconstrain_variables(variables = par_list) }) }) + unconstrained } CmdStanFit$set("public", name = "unconstrain_draws", value = unconstrain_draws) @@ -587,6 +617,10 @@ CmdStanFit$set("public", name = "unconstrain_draws", value = unconstrain_draws) #' fit_mcmc$variable_skeleton() #' } #' +#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()], +#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()], +#' [hessian()] +#' variable_skeleton <- function(transformed_parameters = TRUE, generated_quantities = TRUE) { if (is.null(private$model_methods_env_$model_ptr)) { stop("The method has not been compiled, please call `init_model_methods()` first", @@ -607,19 +641,24 @@ CmdStanFit$set("public", name = "variable_skeleton", value = variable_skeleton) #' @description The `$constrain_variables()` method transforms input parameters to #' the constrained scale #' -#' @param unconstrained_variables (numeric) A vector of unconstrained parameters to constrain -#' @param transformed_parameters (boolean) Whether to return transformed parameters -#' implied by newly-constrained parameters (defaults to TRUE) +#' @param unconstrained_variables (numeric) A vector of unconstrained parameters +#' to constrain. +#' @param transformed_parameters (boolean) Whether to return transformed +#' parameters implied by newly-constrained parameters (defaults to TRUE). #' @param generated_quantities (boolean) Whether to return generated quantities -#' implied by newly-constrained parameters (defaults to TRUE) +#' implied by newly-constrained parameters (defaults to TRUE). #' #' @examples #' \dontrun{ #' fit_mcmc <- cmdstanr_example("logistic", method = "sample") #' fit_mcmc$init_model_methods() -#' fit_mcmc$constrain_variables(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1)) +#' fit_mcmc$constrain_variables(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2)) #' } #' +#' @seealso [log_prob()], [grad_log_prob()], [constrain_variables()], +#' [unconstrain_variables()], [unconstrain_draws()], [variable_skeleton()], +#' [hessian()] +#' constrain_variables <- function(unconstrained_variables, transformed_parameters = TRUE, generated_quantities = TRUE) { if (is.null(private$model_methods_env_$model_ptr)) { @@ -1233,6 +1272,20 @@ CmdStanFit$set("public", name = "code", value = code) #' [`$time()`][fit-method-time] | Report total and chain-specific run times. | #' [`$return_codes()`][fit-method-return_codes] | Return the return codes from the CmdStan runs. | #' +#' ## Expose Stan functions and additional methods to R +#' +#' |**Method**|**Description**| +#' |:----------|:---------------| +#' [`$expose_functions()`][fit-method-expose_functions] | Expose Stan functions for use in R. | +#' [`$init_model_methods()`][fit-method-init_model_methods] | Expose methods for log-probability, gradients, parameter constraining and unconstraining. | +#' [`$log_prob()`][fit-method-log_prob] | Calculate log-prob. | +#' [`$grad_log_prob()`][fit-method-grad_log_prob] | Calculate log-prob and gradient. | +#' [`$hessian()`][fit-method-hessian] | Calculate log-prob, gradient, and hessian. | +#' [`$constrain_variables()`][fit-method-constrain_variables] | Transform a set of unconstrained parameter values to the constrained scale. | +#' [`$unconstrain_variables()`][fit-method-unconstrain_variables] | Transform a set of parameter values to the unconstrained scale. | +#' [`$unconstrain_draws()`][fit-method-unconstrain_draws] | Transform all parameter draws to the unconstrained scale. | +#' [`$variable_skeleton()`][fit-method-variable_skeleton] | Helper function to re-structure a vector of constrained parameter values. | +#' CmdStanMCMC <- R6::R6Class( classname = "CmdStanMCMC", inherit = CmdStanFit, @@ -1718,6 +1771,20 @@ CmdStanMCMC$set("public", name = "num_chains", value = num_chains) #' [`$output()`][fit-method-output] | Pretty print the output that was printed to the console. | #' [`$return_codes()`][fit-method-return_codes] | Return the return codes from the CmdStan runs. | #' +#' ## Expose Stan functions and additional methods to R +#' +#' |**Method**|**Description**| +#' |:----------|:---------------| +#' [`$expose_functions()`][fit-method-expose_functions] | Expose Stan functions for use in R. | +#' [`$init_model_methods()`][fit-method-init_model_methods] | Expose methods for log-probability, gradients, parameter constraining and unconstraining. | +#' [`$log_prob()`][fit-method-log_prob] | Calculate log-prob. | +#' [`$grad_log_prob()`][fit-method-grad_log_prob] | Calculate log-prob and gradient. | +#' [`$hessian()`][fit-method-hessian] | Calculate log-prob, gradient, and hessian. | +#' [`$constrain_variables()`][fit-method-constrain_variables] | Transform a set of unconstrained parameter values to the constrained scale. | +#' [`$unconstrain_variables()`][fit-method-unconstrain_variables] | Transform a set of parameter values to the unconstrained scale. | +#' [`$unconstrain_draws()`][fit-method-unconstrain_draws] | Transform all parameter draws to the unconstrained scale. | +#' [`$variable_skeleton()`][fit-method-variable_skeleton] | Helper function to re-structure a vector of constrained parameter values. | +#' CmdStanMLE <- R6::R6Class( classname = "CmdStanMLE", inherit = CmdStanFit, @@ -1821,6 +1888,20 @@ CmdStanMLE$set("public", name = "mle", value = mle) #' [`$output()`][fit-method-output] | Pretty print the output that was printed to the console. | #' [`$return_codes()`][fit-method-return_codes] | Return the return codes from the CmdStan runs. | #' +#' ## Expose Stan functions and additional methods to R +#' +#' |**Method**|**Description**| +#' |:----------|:---------------| +#' [`$expose_functions()`][fit-method-expose_functions] | Expose Stan functions for use in R. | +#' [`$init_model_methods()`][fit-method-init_model_methods] | Expose methods for log-probability, gradients, parameter constraining and unconstraining. | +#' [`$log_prob()`][fit-method-log_prob] | Calculate log-prob. | +#' [`$grad_log_prob()`][fit-method-grad_log_prob] | Calculate log-prob and gradient. | +#' [`$hessian()`][fit-method-hessian] | Calculate log-prob, gradient, and hessian. | +#' [`$constrain_variables()`][fit-method-constrain_variables] | Transform a set of unconstrained parameter values to the constrained scale. | +#' [`$unconstrain_variables()`][fit-method-unconstrain_variables] | Transform a set of parameter values to the unconstrained scale. | +#' [`$unconstrain_draws()`][fit-method-unconstrain_draws] | Transform all parameter draws to the unconstrained scale. | +#' [`$variable_skeleton()`][fit-method-variable_skeleton] | Helper function to re-structure a vector of constrained parameter values. | +#' CmdStanVB <- R6::R6Class( classname = "CmdStanVB", inherit = CmdStanFit, diff --git a/R/model.R b/R/model.R index e95c13759..f32f7e424 100644 --- a/R/model.R +++ b/R/model.R @@ -188,6 +188,7 @@ cmdstan_model <- function(stan_file = NULL, exe_file = NULL, compile = TRUE, ... #' [`$exe_file()`][model-method-compile] | Return the file path to the compiled executable. | #' [`$hpp_file()`][model-method-compile] | Return the file path to the `.hpp` file containing the generated C++ code. | #' [`$save_hpp_file()`][model-method-compile] | Save the `.hpp` file containing the generated C++ code. | +#' [`$expose_functions()`][model-method-expose_functions] | Expose Stan functions for use in R. | #' #' ## Model fitting #' @@ -325,10 +326,6 @@ CmdStanModel <- R6::R6Class( "- ", new_hpp_loc) private$hpp_file_ <- new_hpp_loc invisible(private$hpp_file_) - }, - expose_functions = function(global = FALSE, verbose = FALSE) { - expose_functions(self$functions, global, verbose) - invisible(NULL) } ) ) @@ -391,10 +388,16 @@ CmdStanModel <- R6::R6Class( #' not modified since last compiled. The default is `FALSE`. Can also be set #' via a global `cmdstanr_force_recompile` option. #' @param compile_model_methods (logical) Compile additional model methods -#' (`log_prob()`, `grad_log_prob()`, `constrain_pars()`, `unconstrain_pars()`) +#' (`log_prob()`, `grad_log_prob()`, `constrain_variables()`, +#' `unconstrain_variables()`). #' @param compile_hessian_method (logical) Should the (experimental) `hessian()` method be #' be compiled with the model methods? -#' @param compile_standalone (logical) Should functions in the Stan model be compiled for used in R? +#' @param compile_standalone (logical) Should functions in the Stan model be +#' compiled for use in R? If `TRUE` the functions will be available via the +#' `functions` field in the compiled model object. This can also be done after +#' compilation using the +#' [`$expose_functions()`][model-method-expose_functions] method. +#' #' @param threads Deprecated and will be removed in a future release. Please #' turn on threading via `cpp_options = list(stan_threads = TRUE)` instead. #' @@ -584,7 +587,7 @@ compile <- function(quiet = TRUE, self$functions$hpp_code <- get_standalone_hpp(temp_stan_file, stancflags_standalone) self$functions$external <- !is.null(user_header) if (compile_standalone) { - expose_functions(self$functions, !quiet) + expose_stan_functions(self$functions, !quiet) } stancflags_val <- paste0("STANCFLAGS += ", stancflags_val, paste0(" ", stancflags_combined, collapse = " ")) withr::with_path( @@ -1749,6 +1752,63 @@ diagnose <- function(data = NULL, } CmdStanModel$set("public", name = "diagnose", value = diagnose) +#' Expose Stan functions to R +#' +#' @name model-method-expose_functions +#' @aliases expose_functions fit-method-expose_functions +#' @family CmdStanModel methods +#' +#' @description The `$expose_functions()` method of a [`CmdStanModel`] object +#' will compile the functions in the Stan program's `functions` block and +#' expose them for use in \R. This can also be specified via the +#' `compile_standalone` argument to the [`$compile()`][model-method-compile] +#' method. +#' +#' This method is also available for fitted model objects ([`CmdStanMCMC`], [`CmdStanVB`], etc.). +#' See **Examples**. +#' +#' Note: there may be many compiler warnings emitted during compilation but +#' these can be ignored so long as they are warnings and not errors. +#' +#' @param global (logical) Should the functions be added to the Global +#' Environment? The default is `FALSE`, in which case the functions are +#' available via the `functions` field of the R6 object. +#' @param verbose (logical) Should detailed information about generated code be +#' printed to the console? Defaults to `FALSE`. +#' @template seealso-docs +#' @examples +#' \dontrun{ +#' stan_file <- write_stan_file( +#' " +#' functions { +#' real a_plus_b(real a, real b) { +#' return a + b; +#' } +#' } +#' parameters { +#' real x; +#' } +#' model { +#' x ~ std_normal(); +#' } +#' " +#' ) +#' mod <- cmdstan_model(stan_file) +#' mod$expose_functions() +#' mod$functions$a_plus_b(1, 2) +#' +#' fit <- mod$sample(refresh = 0) +#' fit$expose_functions() # already compiled because of above but this would compile them otherwise +#' fit$functions$a_plus_b(1, 2) +#' } +#' +#' +expose_functions = function(global = FALSE, verbose = FALSE) { + expose_stan_functions(self$functions, global, verbose) + invisible(NULL) +} +CmdStanModel$set("public", name = "expose_functions", value = expose_functions) + # internal ---------------------------------------------------------------- diff --git a/R/utils.R b/R/utils.R index 3eef2590d..7e732c0a5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -235,7 +235,7 @@ generate_file_names <- #' Set or get the number of threads used to execute Stan models #' #' DEPRECATED. Please use the `threads_per_chain` argument when fitting the model. -#' +#' @keywords internal #' @name stan_threads NULL @@ -907,7 +907,7 @@ compile_functions <- function(env, verbose = FALSE, global = FALSE) { invisible(NULL) } -expose_functions <- function(function_env, global = FALSE, verbose = FALSE) { +expose_stan_functions <- function(function_env, global = FALSE, verbose = FALSE) { if (os_is_wsl()) { stop("Standalone functions are not currently available with ", "WSL CmdStan and will not be compiled", diff --git a/_pkgdown.yml b/_pkgdown.yml index 2f99caefa..ca5375be8 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -70,6 +70,7 @@ articles: and using CmdStanR in R Markdown documents. contents: - cmdstanr-internals + - posterior - r-markdown - deprecations - profiling @@ -110,6 +111,8 @@ reference: - write_stan_json - write_stan_file - draws_to_csv + - as_mcmc.list + - as_draws.CmdStanMCMC - title: "Using CmdStanR with knitr and R Markdown" contents: - register_knitr_engine diff --git a/docs/404.html b/docs/404.html index 0b36386a3..ced1d14e2 100644 --- a/docs/404.html +++ b/docs/404.html @@ -1,74 +1,34 @@ - - - - + + + + - Page not found (404) • cmdstanr - - - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + - - - - -
-
- + +
+ + + - - -
+
+
-
+ + - - diff --git a/docs/LICENSE-text.html b/docs/LICENSE-text.html index 697b1bfa8..5ed5129d6 100644 --- a/docs/LICENSE-text.html +++ b/docs/LICENSE-text.html @@ -1,74 +1,12 @@ - - - - - - - -License • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -License • cmdstanr - - + + - - -
-
- -
- -
+
+
-
- +
- - + + diff --git a/docs/LICENSE.html b/docs/LICENSE.html index 21ed26a88..2dcbb7c1a 100644 --- a/docs/LICENSE.html +++ b/docs/LICENSE.html @@ -1,74 +1,12 @@ - - - - - - - -BSD 3-Clause License • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -BSD 3-Clause License • cmdstanr - - + + - - -
-
- -
- -
+
+
-
- +
- - + + diff --git a/docs/articles/articles-online-only/opencl.html b/docs/articles/articles-online-only/opencl.html index 98da606fa..c2af01ec9 100644 --- a/docs/articles/articles-online-only/opencl.html +++ b/docs/articles/articles-online-only/opencl.html @@ -38,7 +38,7 @@ cmdstanr - 0.5.0 + 0.6.0
@@ -60,7 +60,7 @@ News
  • - Stan + Stan
  • [1] "J"     "sigma" "y"    
    -names(variables$parameters)
    +names(variables$parameters)
    [1] "mu"        "tau"       "theta_raw"
    -names(variables$transformed_parameters)
    +names(variables$transformed_parameters)
    [1] "theta"
    -names(variables$generated_quantities)
    -
    character(0)
    -

    Each variable is represented as a list containing the type information (currently limited to real or int) and the number of dimensions.

    +names(variables$generated_quantities) +
    character(0)
    +

    Each variable is represented as a list containing the type +information (currently limited to real or int) +and the number of dimensions.

    -variables$data$J
    +variables$data$J
    $type
     [1] "int"
     
     $dimensions
     [1] 0
    -variables$data$sigma
    +variables$data$sigma
    $type
     [1] "real"
     
     $dimensions
     [1] 1
    -variables$parameters$tau
    +variables$parameters$tau
    $type
     [1] "real"
     
     $dimensions
     [1] 0
    -variables$transformed_parameters$theta
    +variables$transformed_parameters$theta
    $type
     [1] "real"
     
     $dimensions
     [1] 1
    -
    -

    -Executable location

    -

    By default, the executable is created in the same directory as the file containing the Stan program. You can also specify a different location with the dir argument.

    +
    +

    Executable location +

    +

    By default, the executable is created in the same directory as the +file containing the Stan program. You can also specify a different +location with the dir argument.

    -mod <- cmdstan_model(stan_file, dir = "path/to/directory/for/executable")
    +mod <- cmdstan_model(stan_file, dir = "path/to/directory/for/executable")
    -
    -

    -Processing data

    -

    There are three data formats that CmdStanR allows when fitting a model:

    +
    +

    Processing data +

    +

    There are three data formats that CmdStanR allows when fitting a +model:

    -
    -

    -Named list of R objects

    -

    Like the RStan interface, CmdStanR accepts a named list of R objects where the names correspond to variables declared in the data block of the Stan program. In the Bernoulli model the data is N, the number of data points, and y an integer array of observations.

    +
    +

    Named list of R objects +

    +

    Like the RStan interface, CmdStanR accepts a named list of R objects +where the names correspond to variables declared in the data block of +the Stan program. In the Bernoulli model the data is N, the +number of data points, and y an integer array of +observations.

    -mod$print()
    +mod$print()
    data {
       int<lower=0> N;
    -  array[N] int<lower=0,upper=1> y; // or int<lower=0,upper=1> y[N];
    +  array[N] int<lower=0,upper=1> y;
     }
     parameters {
       real<lower=0,upper=1> theta;
    @@ -383,92 +441,113 @@ 

    y ~ bernoulli(theta); }

    -# data block has 'N' and 'y'
    -data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1))
    -fit <- mod$sample(data = data_list)
    -

    Because CmdStan doesn’t accept lists of R objects, CmdStanR will first write the data to a temporary JSON file using write_stan_json(). This happens internally, but it is also possible to call write_stan_json() directly.

    +# data block has 'N' and 'y' +data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) +fit <- mod$sample(data = data_list)
    +

    Because CmdStan doesn’t accept lists of R objects, CmdStanR will +first write the data to a temporary JSON file using +write_stan_json(). This happens internally, but it is also +possible to call write_stan_json() directly.

    -data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1))
    -json_file <- tempfile(fileext = ".json")
    -write_stan_json(data_list, json_file)
    -cat(readLines(json_file), sep = "\n")
    +data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) +json_file <- tempfile(fileext = ".json") +write_stan_json(data_list, json_file) +cat(readLines(json_file), sep = "\n")
    {
       "N": 10,
       "y": [0, 1, 0, 0, 0, 0, 0, 0, 0, 1]
     }
    -
    -

    -JSON file

    -

    If you already have your data in a JSON file you can just pass that file directly to CmdStanR instead of using a list of R objects. For example, we could pass in the JSON file we created above using write_stan_json():

    +
    +

    JSON file +

    +

    If you already have your data in a JSON file you can just pass that +file directly to CmdStanR instead of using a list of R objects. For +example, we could pass in the JSON file we created above using +write_stan_json():

    -fit <- mod$sample(data = json_file)
    +fit <- mod$sample(data = json_file)
    -
    -

    -R dump file

    -

    Finally, it is also possible to use the R dump file format. This is not recommended because CmdStan can process JSON faster than R dump, but CmdStanR allows it because CmdStan will accept files created by rstan::stan_rdump():

    +
    +

    R dump file +

    +

    Finally, it is also possible to use the R dump file format. This is +not recommended because CmdStan can process JSON faster than R +dump, but CmdStanR allows it because CmdStan will accept files created +by rstan::stan_rdump():

    -rdump_file <- tempfile(fileext = ".data.R")
    -rstan::stan_rdump(names(data_list), file = rdump_file, envir = list2env(data_list))
    -cat(readLines(rdump_file), sep = "\n")
    -fit <- mod$sample(data = rdump_file)
    +rdump_file <- tempfile(fileext = ".data.R") +rstan::stan_rdump(names(data_list), file = rdump_file, envir = list2env(data_list)) +cat(readLines(rdump_file), sep = "\n") +fit <- mod$sample(data = rdump_file)
    -
    -

    -Writing CmdStan output to CSV

    -
    -

    -Default temporary files

    +
    +

    Writing CmdStan output to CSV +

    +
    +

    Default temporary files +

    -data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1))
    -fit <- mod$sample(data = data_list)
    -

    When fitting a model, the default behavior is to write the output from CmdStan to CSV files in a temporary directory.

    +data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) +fit <- mod$sample(data = data_list)
    +

    When fitting a model, the default behavior is to write the output +from CmdStan to CSV files in a temporary directory.

    -fit$output_files()
    -
    [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-202203181228-1-9912f0.csv"
    -[2] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-202203181228-2-9912f0.csv"
    -[3] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-202203181228-3-9912f0.csv"
    -[4] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-202203181228-4-9912f0.csv"
    -

    These files will be lost if you end your R session or if you remove the fit object and force (or wait for) garbage collection.

    +fit$output_files()
    +
    [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-202307251455-1-48e885.csv"
    +[2] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-202307251455-2-48e885.csv"
    +[3] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-202307251455-3-48e885.csv"
    +[4] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-202307251455-4-48e885.csv"
    +

    These files will be lost if you end your R session or if you remove +the fit object and force (or wait for) garbage +collection.

    -files <- fit$output_files()
    -file.exists(files)
    +files <- fit$output_files() +file.exists(files)
    [1] TRUE TRUE TRUE TRUE
    -rm(fit)
    -gc()
    -
              used (Mb) gc trigger (Mb) limit (Mb) max used  (Mb)
    -Ncells 1135293 60.7    2208007  118         NA  2208007 118.0
    -Vcells 2043878 15.6    8388608   64      32768  3361903  25.7
    +rm(fit) +gc()
    +
              used (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
    +Ncells 1260687 67.4    2431888 129.9         NA  2431888 129.9
    +Vcells 2216221 17.0    8388608  64.0      32768  3504936  26.8
    -file.exists(files)
    +file.exists(files)
    [1] FALSE FALSE FALSE FALSE
    -
    -

    -Non-temporary files

    -

    To save these files to a non-temporary location there are two options. You can either specify the output_dir argument to mod$sample() or use fit$save_output_files() after fitting the model.

    +
    +

    Non-temporary files +

    +

    To save these files to a non-temporary location there are two +options. You can either specify the output_dir argument to +mod$sample() or use fit$save_output_files() +after fitting the model.

    -# see ?save_output_files for info on optional arguments
    -fit$save_output_files(dir = "path/to/directory")
    +# see ?save_output_files for info on optional arguments +fit$save_output_files(dir = "path/to/directory")
    -fit <- mod$sample(
    -  data = data_list, 
    -  output_dir = "path/to/directory"
    -)
    +fit <- mod$sample( + data = data_list, + output_dir = "path/to/directory" +)
    -
    -

    -Reading CmdStan output into R

    -
    -

    -Lazy CSV reading

    -

    With the exception of some diagnostic information, the CSV files are not read into R until their contents are requested by calling a method that requires them (e.g., fit$draws(), fit$summary(), etc.). If we examine the structure of the fit object, notice how the Private slot draws_ is NULL, indicating that the CSV files haven’t yet been read into R.

    +
    +

    Reading CmdStan output into R +

    +
    +

    Lazy CSV reading +

    +

    With the exception of some diagnostic information, the CSV files are +not read into R until their contents are requested by calling a method +that requires them (e.g., fit$draws(), +fit$summary(), etc.). If we examine the structure of the +fit object, notice how the Private slot +draws_ is NULL, indicating that the CSV files +haven’t yet been read into R.

    -str(fit)
    +str(fit)
    Classes 'CmdStanMCMC', 'CmdStanFit', 'R6' <CmdStanMCMC>
       Inherits from: <CmdStanFit>
       Public:
    @@ -476,14 +555,21 @@ 

    cmdstan_diagnose: function () cmdstan_summary: function (flags = NULL) code: function () + constrain_variables: function (unconstrained_variables, transformed_parameters = TRUE, data_file: function () diagnostic_summary: function (diagnostics = c("divergences", "treedepth", "ebfmi"), draws: function (variables = NULL, inc_warmup = FALSE, format = getOption("cmdstanr_draws_format", + expose_functions: function (global = FALSE, verbose = FALSE) + functions: environment + grad_log_prob: function (unconstrained_variables, jacobian_adjustment = TRUE) + hessian: function (unconstrained_variables, jacobian_adjustment = TRUE) init: function () + init_model_methods: function (seed = 0, verbose = FALSE, hessian = FALSE) initialize: function (runset) inv_metric: function (matrix = TRUE) latent_dynamics_files: function (include_failed = FALSE) - loo: function (variables = "log_lik", r_eff = TRUE, ...) + log_prob: function (unconstrained_variables, jacobian_adjustment = TRUE) + loo: function (variables = "log_lik", r_eff = TRUE, moment_match = FALSE, lp: function () metadata: function () num_chains: function () @@ -503,20 +589,26 @@

    save_profile_files: function (dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) summary: function (variables = NULL, ...) time: function () + unconstrain_draws: function (files = NULL, draws = NULL) + unconstrain_variables: function (variables) + variable_skeleton: function (transformed_parameters = TRUE, generated_quantities = TRUE) Private: draws_: NULL init_: NULL inv_metric_: list metadata_: list + model_methods_env_: environment profiles_: NULL read_csv_: function (variables = NULL, sampler_diagnostics = NULL, format = getOption("cmdstanr_draws_format", - sampler_diagnostics_: 1 2 2 1 2 1 1 2 1 1 1 1 2 2 1 2 1 1 2 1 2 1 1 1 1 2 1 1 ... + sampler_diagnostics_: 1 1 2 1 2 1 1 1 2 1 1 1 1 2 2 1 1 1 1 2 1 1 1 1 2 2 2 2 ... warmup_draws_: NULL warmup_sampler_diagnostics_: NULL

    -

    After we call a method that requires the draws then if we reexamine the structure of the object we will see that the draws_ slot in Private is no longer empty.

    +

    After we call a method that requires the draws then if we reexamine +the structure of the object we will see that the draws_ +slot in Private is no longer empty.

    -draws <- fit$draws() # force CSVs to be read into R
    -str(fit)
    +draws <- fit$draws() # force CSVs to be read into R +str(fit)
    Classes 'CmdStanMCMC', 'CmdStanFit', 'R6' <CmdStanMCMC>
       Inherits from: <CmdStanFit>
       Public:
    @@ -524,14 +616,21 @@ 

    cmdstan_diagnose: function () cmdstan_summary: function (flags = NULL) code: function () + constrain_variables: function (unconstrained_variables, transformed_parameters = TRUE, data_file: function () diagnostic_summary: function (diagnostics = c("divergences", "treedepth", "ebfmi"), draws: function (variables = NULL, inc_warmup = FALSE, format = getOption("cmdstanr_draws_format", + expose_functions: function (global = FALSE, verbose = FALSE) + functions: environment + grad_log_prob: function (unconstrained_variables, jacobian_adjustment = TRUE) + hessian: function (unconstrained_variables, jacobian_adjustment = TRUE) init: function () + init_model_methods: function (seed = 0, verbose = FALSE, hessian = FALSE) initialize: function (runset) inv_metric: function (matrix = TRUE) latent_dynamics_files: function (include_failed = FALSE) - loo: function (variables = "log_lik", r_eff = TRUE, ...) + log_prob: function (unconstrained_variables, jacobian_adjustment = TRUE) + loo: function (variables = "log_lik", r_eff = TRUE, moment_match = FALSE, lp: function () metadata: function () num_chains: function () @@ -551,33 +650,43 @@

    save_profile_files: function (dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) summary: function (variables = NULL, ...) time: function () + unconstrain_draws: function (files = NULL, draws = NULL) + unconstrain_variables: function (variables) + variable_skeleton: function (transformed_parameters = TRUE, generated_quantities = TRUE) Private: - draws_: -7.82818 -7.71698 -7.1161 -7.09583 -7.51485 -8.04188 -7. ... + draws_: -7.16701 -7.08773 -6.93018 -6.84097 -6.84713 -6.90778 -7 ... init_: NULL inv_metric_: list metadata_: list + model_methods_env_: environment profiles_: NULL read_csv_: function (variables = NULL, sampler_diagnostics = NULL, format = getOption("cmdstanr_draws_format", - sampler_diagnostics_: 1 2 2 1 2 1 1 2 1 1 1 1 2 2 1 2 1 1 2 1 2 1 1 1 1 2 1 1 ... + sampler_diagnostics_: 1 1 2 1 2 1 1 1 2 1 1 1 1 2 2 1 1 1 1 2 1 1 1 1 2 2 2 2 ... warmup_draws_: NULL warmup_sampler_diagnostics_: NULL

    -

    For models with many parameters, transformed parameters, or generated quantities, if only some are requested (e.g., by specifying the variables argument to fit$draws()) then CmdStanR will only read in the requested variables (unless they have already been read in).

    +

    For models with many parameters, transformed parameters, or generated +quantities, if only some are requested (e.g., by specifying the +variables argument to fit$draws()) then +CmdStanR will only read in the requested variables (unless they have +already been read in).

    -
    -

    -read_cmdstan_csv()

    -

    Internally, the read_cmdstan_csv() function is used to read the CmdStan CSV files into R. This function is exposed to users, so you can also call it directly.

    +
    +

    read_cmdstan_csv() +

    +

    Internally, the read_cmdstan_csv() function is used to +read the CmdStan CSV files into R. This function is exposed to users, so +you can also call it directly.

    -# see ?read_cmdstan_csv for info on optional arguments controlling 
    -# what information is read in
    -csv_contents <- read_cmdstan_csv(fit$output_files())
    -str(csv_contents)
    +# see ?read_cmdstan_csv for info on optional arguments controlling +# what information is read in +csv_contents <- read_cmdstan_csv(fit$output_files()) +str(csv_contents)
    List of 8
      $ metadata                       :List of 40
       ..$ stan_version_major  : num 2
    -  ..$ stan_version_minor  : num 29
    -  ..$ stan_version_patch  : num 1
    -  ..$ start_datetime      : chr "2022-03-18 18:28:03 UTC"
    +  ..$ stan_version_minor  : num 32
    +  ..$ stan_version_patch  : num 2
    +  ..$ start_datetime      : chr "2023-07-25 20:55:39 UTC"
       ..$ method              : chr "sample"
       ..$ save_warmup         : num 0
       ..$ thin                : num 1
    @@ -594,14 +703,14 @@ 

    ..$ num_chains : num 1 ..$ id : num [1:4] 1 2 3 4 ..$ init : num [1:4] 2 2 2 2 - ..$ seed : num 1.13e+09 + ..$ seed : num 1.01e+09 ..$ refresh : num 100 ..$ sig_figs : num -1 - ..$ profile_file : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-profile-202203181228-1-6c529e.csv" - ..$ stanc_version : chr "stanc3 v2.29.1" + ..$ profile_file : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-profile-202307251455-1-07c6cb.csv" + ..$ stanc_version : chr "stanc3 v2.32.2" ..$ sampler_diagnostics : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ... ..$ variables : chr [1:2] "lp__" "theta" - ..$ step_size_adaptation: num [1:4] 1.062 0.9 0.966 0.973 + ..$ step_size_adaptation: num [1:4] 0.938 0.853 0.941 0.842 ..$ model_name : chr "bernoulli_model" ..$ adapt_engaged : num 1 ..$ adapt_delta : num 0.8 @@ -612,9 +721,9 @@

    ..$ threads_per_chain : num 1 ..$ time :'data.frame': 4 obs. of 4 variables: .. ..$ chain_id: num [1:4] 1 2 3 4 - .. ..$ warmup : num [1:4] 0.004 0.005 0.005 0.004 - .. ..$ sampling: num [1:4] 0.012 0.013 0.018 0.014 - .. ..$ total : num [1:4] 0.016 0.018 0.023 0.018 + .. ..$ warmup : num [1:4] 0.004 0.004 0.004 0.004 + .. ..$ sampling: num [1:4] 0.012 0.011 0.011 0.012 + .. ..$ total : num [1:4] 0.016 0.015 0.015 0.016 ..$ stan_variable_sizes :List of 2 .. ..$ lp__ : num 1 .. ..$ theta: num 1 @@ -624,143 +733,178 @@

    ..$ total : int NA ..$ chains:'data.frame': 4 obs. of 4 variables: .. ..$ chain_id: num [1:4] 1 2 3 4 - .. ..$ warmup : num [1:4] 0.004 0.005 0.005 0.004 - .. ..$ sampling: num [1:4] 0.012 0.013 0.018 0.014 - .. ..$ total : num [1:4] 0.016 0.018 0.023 0.018 + .. ..$ warmup : num [1:4] 0.004 0.004 0.004 0.004 + .. ..$ sampling: num [1:4] 0.012 0.011 0.011 0.012 + .. ..$ total : num [1:4] 0.016 0.015 0.015 0.016 $ inv_metric :List of 4 - ..$ 1: num 0.532 - ..$ 2: num 0.497 - ..$ 3: num 0.514 - ..$ 4: num 0.539 + ..$ 1: num 0.491 + ..$ 2: num 0.494 + ..$ 3: num 0.449 + ..$ 4: num 0.511 $ step_size :List of 4 - ..$ 1: num 1.06 - ..$ 2: num 0.9 - ..$ 3: num 0.966 - ..$ 4: num 0.973 + ..$ 1: num 0.938 + ..$ 2: num 0.853 + ..$ 3: num 0.941 + ..$ 4: num 0.842 $ warmup_draws : NULL - $ post_warmup_draws : 'draws_array' num [1:1000, 1:4, 1:2] -7.83 -7.72 -7.12 -7.1 -7.51 ... + $ post_warmup_draws : 'draws_array' num [1:1000, 1:4, 1:2] -7.17 -7.09 -6.93 -6.84 -6.85 ... ..- attr(*, "dimnames")=List of 3 .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... .. ..$ chain : chr [1:4] "1" "2" "3" "4" .. ..$ variable : chr [1:2] "lp__" "theta" $ warmup_sampler_diagnostics : NULL - $ post_warmup_sampler_diagnostics: 'draws_array' num [1:1000, 1:4, 1:6] 1 1 1 1 0.888 ... + $ post_warmup_sampler_diagnostics: 'draws_array' num [1:1000, 1:4, 1:6] 0.86 1 0.94 1 0.999 ... ..- attr(*, "dimnames")=List of 3 .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... .. ..$ chain : chr [1:4] "1" "2" "3" "4" .. ..$ variable : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ...

    -
    -

    -as_cmdstan_fit()

    -

    If you need to manually create fitted model objects from CmdStan CSV files use as_cmdstan_fit().

    +
    +

    as_cmdstan_fit() +

    +

    If you need to manually create fitted model objects from CmdStan CSV +files use as_cmdstan_fit().

    -fit2 <- as_cmdstan_fit(fit$output_files())
    -

    This is pointless in our case since we have the original fit object, but this function can be used to create fitted model objects (CmdStanMCMC, CmdStanMLE, etc.) from any CmdStan CSV files.

    +fit2 <- as_cmdstan_fit(fit$output_files())
    +

    This is pointless in our case since we have the original +fit object, but this function can be used to create fitted +model objects (CmdStanMCMC, CmdStanMLE, etc.) +from any CmdStan CSV files.

    -
    -

    -Saving and accessing advanced algorithm info (latent dynamics)

    -

    If save_latent_dynamics is set to TRUE when running the $sample() method then additional CSV files are created (one per chain) that provide access to quantities used under the hood by Stan’s implementation of dynamic Hamiltonian Monte Carlo.

    -

    CmdStanR does not yet provide a special method for processing these files but they can be read into R using R’s standard CSV reading functions.

    +
    +

    Saving and accessing advanced algorithm info (latent dynamics) +

    +

    If save_latent_dynamics is set to TRUE when +running the $sample() method then additional CSV files are +created (one per chain) that provide access to quantities used under the +hood by Stan’s implementation of dynamic Hamiltonian Monte Carlo.

    +

    CmdStanR does not yet provide a special method for processing these +files but they can be read into R using R’s standard CSV reading +functions.

    -fit <- mod$sample(data = data_list, save_latent_dynamics = TRUE)
    +fit <- mod$sample(data = data_list, save_latent_dynamics = TRUE)
    -fit$latent_dynamics_files()
    -
    [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-diagnostic-202203181228-1-263c4e.csv"
    -[2] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-diagnostic-202203181228-2-263c4e.csv"
    -[3] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-diagnostic-202203181228-3-263c4e.csv"
    -[4] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-diagnostic-202203181228-4-263c4e.csv"
    +fit$latent_dynamics_files()
    +
    [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-diagnostic-202307251455-1-156ae3.csv"
    +[2] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-diagnostic-202307251455-2-156ae3.csv"
    +[3] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-diagnostic-202307251455-3-156ae3.csv"
    +[4] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-diagnostic-202307251455-4-156ae3.csv"
    -# read one of the files in
    -x <- utils::read.csv(fit$latent_dynamics_files()[1], comment.char = "#")
    -head(x)
    +# read one of the files in +x <- utils::read.csv(fit$latent_dynamics_files()[1], comment.char = "#") +head(x)
          lp__ accept_stat__ stepsize__ treedepth__ n_leapfrog__ divergent__
    -1 -7.29215      0.999807   0.944718           2            3           0
    -2 -6.81105      0.988812   0.944718           1            3           0
    -3 -8.51410      0.694033   0.944718           1            3           0
    -4 -7.89903      1.000000   0.944718           1            1           0
    -5 -8.72776      0.925309   0.944718           1            1           0
    -6 -6.87320      1.000000   0.944718           2            3           0
    -  energy__    theta    p_theta   g_theta
    -1  7.38144 -1.84247 -0.6346060 -1.358890
    -2  7.27073 -1.34029  1.4398900 -0.510444
    -3  9.26904 -2.53206 -1.8452700 -2.116310
    -4  8.47008 -2.22096  1.6048800 -1.825390
    -5  8.72848 -2.63117  0.0571749 -2.194090
    -6  8.50747 -1.44232 -2.7149800 -0.705762
    -

    The column lp__ is also provided via fit$draws(), and the columns accept_stat__, stepsize__, treedepth__, n_leapfrog__, divergent__, and energy__ are also provided by fit$sampler_diagnostics(), but there are several columns unique to the latent dynamics file.

    +1 -6.77840 0.964447 0.816376 2 3 0 +2 -6.74817 0.998632 0.816376 1 3 0 +3 -6.74817 0.828194 0.816376 1 3 0 +4 -6.75537 0.956585 0.816376 2 3 0 +5 -7.01137 0.961708 0.816376 2 3 0 +6 -6.77984 0.967943 0.816376 1 3 0 + energy__ theta p_theta g_theta +1 7.09195 -1.265290 -1.074960 -0.3592230 +2 6.78971 -1.087340 -0.391278 0.0254425 +3 7.76283 -1.087340 -1.933750 0.0254425 +4 7.08469 -1.018340 -1.101660 0.1842200 +5 7.09451 -0.631842 -0.553509 1.1651200 +6 7.23052 -1.269250 -1.288770 -0.3673810 +

    The column lp__ is also provided via +fit$draws(), and the columns accept_stat__, +stepsize__, treedepth__, +n_leapfrog__, divergent__, and +energy__ are also provided by +fit$sampler_diagnostics(), but there are several columns +unique to the latent dynamics file.

    -head(x[, c("theta", "p_theta", "g_theta")])
    -
         theta    p_theta   g_theta
    -1 -1.84247 -0.6346060 -1.358890
    -2 -1.34029  1.4398900 -0.510444
    -3 -2.53206 -1.8452700 -2.116310
    -4 -2.22096  1.6048800 -1.825390
    -5 -2.63117  0.0571749 -2.194090
    -6 -1.44232 -2.7149800 -0.705762
    -

    Our model has a single parameter theta and the three columns above correspond to theta in the unconstrained space (theta on the constrained space is accessed via fit$draws()), the auxiliary momentum p_theta, and the gradient g_theta. In general, each of these three columns will exist for every parameter in the model.

    +head(x[, c("theta", "p_theta", "g_theta")]) +
          theta   p_theta    g_theta
    +1 -1.265290 -1.074960 -0.3592230
    +2 -1.087340 -0.391278  0.0254425
    +3 -1.087340 -1.933750  0.0254425
    +4 -1.018340 -1.101660  0.1842200
    +5 -0.631842 -0.553509  1.1651200
    +6 -1.269250 -1.288770 -0.3673810
    +

    Our model has a single parameter theta and the three +columns above correspond to theta in the +unconstrained space (theta on the constrained +space is accessed via fit$draws()), the auxiliary momentum +p_theta, and the gradient g_theta. In general, +each of these three columns will exist for every parameter in +the model.

    -
    -

    -Saving fitted model objects

    -

    As described above, the contents of the CSV files are only read into R when they are needed. This means that in order to save a fitted model object containing all of the posterior draws and sampler diagnostics you should either make sure to call fit$draws() and fit$sampler_diagnostics() before saving the object fit, or use the special $save_object() method provided by CmdStanR, which will ensure that everything has been read into R before saving the object using saveRDS().

    +
    +

    Saving fitted model objects +

    +

    As described above, the contents of the CSV files are only read into +R when they are needed. This means that in order to save a fitted model +object containing all of the posterior draws and sampler +diagnostics you should either make sure to call fit$draws() +and fit$sampler_diagnostics() before saving the object +fit, or use the special $save_object() method +provided by CmdStanR, which will ensure that everything has been read +into R before saving the object using saveRDS().

    -temp_rds_file <- tempfile(fileext = ".RDS") # temporary file just for demonstration
    -fit$save_object(file = temp_rds_file)
    -

    We can check that this worked by removing fit and loading it back in from the save file.

    +temp_rds_file <- tempfile(fileext = ".RDS") # temporary file just for demonstration +fit$save_object(file = temp_rds_file)
    +

    We can check that this worked by removing fit and +loading it back in from the save file.

    -rm(fit); gc()
    -
              used (Mb) gc trigger (Mb) limit (Mb) max used  (Mb)
    -Ncells 1159655 62.0    2208007  118         NA  2208007 118.0
    -Vcells 2197871 16.8    8388608   64      32768  3895933  29.8
    +rm(fit); gc()
    +
              used (Mb) gc trigger  (Mb) limit (Mb) max used  (Mb)
    +Ncells 1286546 68.8    2431888 129.9         NA  2431888 129.9
    +Vcells 2365428 18.1    8388608  64.0      32768  4130519  31.6
    -fit <- readRDS(temp_rds_file)
    -fit$summary()
    -
    # A tibble: 2 × 10
    +fit <- readRDS(temp_rds_file)
    +fit$summary()
    +
    
[38;5;246m# A tibble: 2 × 10
[39m
       variable   mean median    sd   mad      q5    q95  rhat ess_bulk ess_tail
    -  <chr>     <dbl>  <dbl> <dbl> <dbl>   <dbl>  <dbl> <dbl>    <dbl>    <dbl>
    -1 lp__     -7.25  -6.97  0.732 0.309 -8.71   -6.75   1.00    2009.    1714.
    -2 theta     0.250  0.235 0.118 0.118  0.0821  0.463  1.00    1445.    1640.
    + 
[3m
[38;5;246m<chr>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m 
[3m
[38;5;246m<num>
[39m
[23m +
[38;5;250m1
[39m lp__ -
[31m7
[39m
[31m.
[39m
[31m28
[39m -
[31m7
[39m
[31m.
[39m
[31m00
[39m 0.751 0.339 -
[31m8
[39m
[31m.
[39m
[31m81
[39m -
[31m6
[39m
[31m.
[39m
[31m75
[39m 1.00 
[4m1
[24m833. 
[4m2
[24m545. +
[38;5;250m2
[39m theta 0.253 0.239 0.121 0.125 0.080
[4m5
[24m 0.470 1.00 
[4m1
[24m594. 
[4m1
[24m821.
    -
    -

    -Developing using CmdStanR

    -

    CmdStanR can of course be used for developing other packages that require compiling and running Stan models as well as using new or custom Stan features available through CmdStan.

    -
    -

    -Troubleshooting and debugging

    -

    When developing or testing new features it might be useful to have more information on how CmdStan is called internally and to see more information printed when compiling or running models. This can be enabled for an entire R session by setting the option "cmdstanr_verbose" to TRUE.

    +
    +

    Developing using CmdStanR +

    +

    CmdStanR can of course be used for developing other packages that +require compiling and running Stan models as well as using new or custom +Stan features available through CmdStan.

    +
    +

    Troubleshooting and debugging +

    +

    When developing or testing new features it might be useful to have +more information on how CmdStan is called internally and to see more +information printed when compiling or running models. This can be +enabled for an entire R session by setting the option +"cmdstanr_verbose" to TRUE.

    -options("cmdstanr_verbose"=TRUE)
    -
    -mod <- cmdstan_model(stan_file, force_recompile = TRUE)
    +options("cmdstanr_verbose"=TRUE) + +mod <- cmdstan_model(stan_file, force_recompile = TRUE)
    Running make \
    -  /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/model-dfb56119ce10 \
    +  /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/model-17ac47a020565 \
       "STANCFLAGS +=  --name='bernoulli_model'"
     
     --- Translating Stan model to C++ code ---
    -bin/stanc --name='bernoulli_model' --o=/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/model-dfb56119ce10.hpp /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/model-dfb56119ce10.stan
    +bin/stanc --name='bernoulli_model' --o=/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/model-17ac47a020565.hpp /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/model-17ac47a020565.stan
     
     --- Compiling, linking C++ code ---
    -clang++ -std=c++1y -Wno-unknown-warning-option -Wno-tautological-compare -Wno-sign-compare -D_REENTRANT -Wno-ignored-attributes      -I stan/lib/stan_math/lib/tbb_2020.3/include    -O3 -I src -I stan/src -I lib/rapidjson_1.1.0/ -I lib/CLI11-1.9.1/ -I stan/lib/stan_math/ -I stan/lib/stan_math/lib/eigen_3.3.9 -I stan/lib/stan_math/lib/boost_1.75.0 -I stan/lib/stan_math/lib/sundials_6.0.0/include -I stan/lib/stan_math/lib/sundials_6.0.0/src/sundials    -DBOOST_DISABLE_ASSERTS          -c -include-pch stan/src/stan/model/model_header.hpp.gch -x c++ -o /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/model-dfb56119ce10.o /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/model-dfb56119ce10.hpp
    -clang++ -std=c++1y -Wno-unknown-warning-option -Wno-tautological-compare -Wno-sign-compare -D_REENTRANT -Wno-ignored-attributes      -I stan/lib/stan_math/lib/tbb_2020.3/include    -O3 -I src -I stan/src -I lib/rapidjson_1.1.0/ -I lib/CLI11-1.9.1/ -I stan/lib/stan_math/ -I stan/lib/stan_math/lib/eigen_3.3.9 -I stan/lib/stan_math/lib/boost_1.75.0 -I stan/lib/stan_math/lib/sundials_6.0.0/include -I stan/lib/stan_math/lib/sundials_6.0.0/src/sundials    -DBOOST_DISABLE_ASSERTS                -Wl,-L,"/Users/jgabry/.cmdstan/cmdstan-2.29.1/stan/lib/stan_math/lib/tbb" -Wl,-rpath,"/Users/jgabry/.cmdstan/cmdstan-2.29.1/stan/lib/stan_math/lib/tbb"      /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/model-dfb56119ce10.o src/cmdstan/main.o        -Wl,-L,"/Users/jgabry/.cmdstan/cmdstan-2.29.1/stan/lib/stan_math/lib/tbb" -Wl,-rpath,"/Users/jgabry/.cmdstan/cmdstan-2.29.1/stan/lib/stan_math/lib/tbb"   stan/lib/stan_math/lib/sundials_6.0.0/lib/libsundials_nvecserial.a stan/lib/stan_math/lib/sundials_6.0.0/lib/libsundials_cvodes.a stan/lib/stan_math/lib/sundials_6.0.0/lib/libsundials_idas.a stan/lib/stan_math/lib/sundials_6.0.0/lib/libsundials_kinsol.a  stan/lib/stan_math/lib/tbb/libtbb.dylib stan/lib/stan_math/lib/tbb/libtbbmalloc.dylib stan/lib/stan_math/lib/tbb/libtbbmalloc_proxy.dylib -o /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/model-dfb56119ce10
    -rm -f /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/model-dfb56119ce10.o
    +clang++ -std=c++1y -Wno-unknown-warning-option -Wno-tautological-compare -Wno-sign-compare -D_REENTRANT -Wno-ignored-attributes -I stan/lib/stan_math/lib/tbb_2020.3/include -O3 -I src -I stan/src -I stan/lib/rapidjson_1.1.0/ -I lib/CLI11-1.9.1/ -I stan/lib/stan_math/ -I stan/lib/stan_math/lib/eigen_3.4.0 -I stan/lib/stan_math/lib/boost_1.78.0 -I stan/lib/stan_math/lib/sundials_6.1.1/include -I stan/lib/stan_math/lib/sundials_6.1.1/src/sundials -DBOOST_DISABLE_ASSERTS -c -include-pch stan/src/stan/model/model_header.hpp.gch -x c++ -o /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/model-17ac47a020565.o /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/model-17ac47a020565.hpp +clang++ -std=c++1y -Wno-unknown-warning-option -Wno-tautological-compare -Wno-sign-compare -D_REENTRANT -Wno-ignored-attributes -I stan/lib/stan_math/lib/tbb_2020.3/include -O3 -I src -I stan/src -I stan/lib/rapidjson_1.1.0/ -I lib/CLI11-1.9.1/ -I stan/lib/stan_math/ -I stan/lib/stan_math/lib/eigen_3.4.0 -I stan/lib/stan_math/lib/boost_1.78.0 -I stan/lib/stan_math/lib/sundials_6.1.1/include -I stan/lib/stan_math/lib/sundials_6.1.1/src/sundials -DBOOST_DISABLE_ASSERTS -Wl,-L,"/Users/jgabry/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/tbb" -Wl,-rpath,"/Users/jgabry/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/tbb" /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/model-17ac47a020565.o src/cmdstan/main.o -Wl,-L,"/Users/jgabry/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/tbb" -Wl,-rpath,"/Users/jgabry/.cmdstan/cmdstan-2.32.2/stan/lib/stan_math/lib/tbb" stan/lib/stan_math/lib/sundials_6.1.1/lib/libsundials_nvecserial.a stan/lib/stan_math/lib/sundials_6.1.1/lib/libsundials_cvodes.a stan/lib/stan_math/lib/sundials_6.1.1/lib/libsundials_idas.a stan/lib/stan_math/lib/sundials_6.1.1/lib/libsundials_kinsol.a stan/lib/stan_math/lib/tbb/libtbb.dylib stan/lib/stan_math/lib/tbb/libtbbmalloc.dylib stan/lib/stan_math/lib/tbb/libtbbmalloc_proxy.dylib -o /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/model-17ac47a020565 +rm -f /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/model-17ac47a020565.o
    -fit <- mod$sample(
    -  data = data_list,
    -  chains = 1,
    -  iter_warmup = 100,
    -  iter_sampling = 100
    -)
    +fit <- mod$sample( + data = data_list, + chains = 1, + iter_warmup = 100, + iter_sampling = 100 +)
    Running MCMC with 1 chain...
     
    -Running ./bernoulli 'id=1' random 'seed=911816340' data \
    -  'file=/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/standata-dfb528f42a87.json' \
    +Running ./bernoulli 'id=1' random 'seed=1523483778' data \
    +  'file=/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/standata-17ac41edf7bd5.json' \
       output \
    -  'file=/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-202203181228-1-036a9c.csv' \
    -  'profile_file=/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-profile-202203181228-1-7b073f.csv' \
    +  'file=/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-202307251455-1-7b6962.csv' \
    +  'profile_file=/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-profile-202307251455-1-281032.csv' \
       'method=sample' 'num_samples=100' 'num_warmup=100' 'save_warmup=0' \
       'algorithm=hmc' 'engine=nuts' adapt 'engaged=1'
     Chain 1 method = sample (Default) 
    @@ -790,16 +934,16 @@ 

    Chain 1 num_chains = 1 (Default) Chain 1 id = 1 (Default) Chain 1 data -Chain 1 file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/standata-dfb528f42a87.json +Chain 1 file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/standata-17ac41edf7bd5.json Chain 1 init = 2 (Default) Chain 1 random -Chain 1 seed = 911816340 +Chain 1 seed = 1523483778 Chain 1 output -Chain 1 file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-202203181228-1-036a9c.csv +Chain 1 file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-202307251455-1-7b6962.csv Chain 1 diagnostic_file = (Default) Chain 1 refresh = 100 (Default) Chain 1 sig_figs = -1 (Default) -Chain 1 profile_file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMNhFrl/bernoulli-profile-202203181228-1-7b073f.csv +Chain 1 profile_file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpOiRpoh/bernoulli-profile-202307251455-1-281032.csv Chain 1 num_threads = 1 (Default) Chain 1 Gradient evaluation took 6e-06 seconds Chain 1 1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds. @@ -834,11 +978,13 @@

    -

    Site built with pkgdown 1.6.1.

    +

    +

    Site built with pkgdown 2.0.7.

    @@ -847,5 +993,7 @@

    + + diff --git a/docs/articles/cmdstanr.html b/docs/articles/cmdstanr.html index 5c8946717..ac4bb8ffa 100644 --- a/docs/articles/cmdstanr.html +++ b/docs/articles/cmdstanr.html @@ -26,6 +26,8 @@ + +
    +
    -
    -

    -Introduction

    -

    CmdStanR is a lightweight interface to Stan for R users (see CmdStanPy for Python) that provides an alternative to the traditional RStan interface. See the Comparison with RStan section later in this vignette for more details on how the two interfaces differ.

    -

    CmdStanR is not on CRAN yet, but the beta release can be installed by running the following command in R.

    +
    +

    Introduction +

    +

    CmdStanR (Command Stan R) is a lightweight interface to Stan for R users that provides an +alternative to the traditional RStan interface. See the Comparison with RStan section +later in this vignette for more details on how the two interfaces +differ.

    +

    CmdStanR is not on CRAN yet, but the beta release +can be installed by running the following command in R.

    -# we recommend running this is a fresh R session or restarting your current session
    -install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
    -

    CmdStanR (the cmdstanr R package) can now be loaded like any other R package. We’ll also load the bayesplot and posterior packages to use later in examples.

    +# we recommend running this is a fresh R session or restarting your current session +install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))

    +

    CmdStanR (the cmdstanr R package) can now be loaded +like any other R package. We’ll also load the bayesplot +and posterior packages to use later in examples.

    -
    -
    -

    -Installing CmdStan

    -

    CmdStanR requires a working installation of CmdStan, the shell interface to Stan. If you don’t have CmdStan installed then CmdStanR can install it for you, assuming you have a suitable C++ toolchain. The requirements are described in the CmdStan Guide:

    +library(cmdstanr) +library(posterior) +library(bayesplot) +color_scheme_set("brightblue")
    + +
    +

    Installing CmdStan +

    +

    CmdStanR requires a working installation of CmdStan, +the shell interface to Stan. If you don’t have CmdStan installed then +CmdStanR can install it for you, assuming you have a suitable C++ +toolchain. The requirements are described in the CmdStan Guide:

    -

    To double check that your toolchain is set up properly you can call the check_cmdstan_toolchain() function:

    +

    To double check that your toolchain is set up properly you can call +the check_cmdstan_toolchain() function:

    +check_cmdstan_toolchain()
    The C++ toolchain required for CmdStan is setup properly!
    -

    If your toolchain is configured correctly then CmdStan can be installed by calling the install_cmdstan() function:

    +

    If your toolchain is configured correctly then CmdStan can be +installed by calling the install_cmdstan() +function:

    -install_cmdstan(cores = 2)
    -

    Before CmdStanR can be used it needs to know where the CmdStan installation is located. When the package is loaded it tries to help automate this to avoid having to manually set the path every session:

    +install_cmdstan(cores = 2) +

    Before CmdStanR can be used it needs to know where the CmdStan +installation is located. When the package is loaded it tries to help +automate this to avoid having to manually set the path every +session:

      -
    1. If the environment variable "CMDSTAN" exists at load time then its value will be automatically set as the default path to CmdStan for the R session. This is useful if your CmdStan installation is not located in the default directory that would have been used by install_cmdstan() (see #2).

    2. -
    3. If no environment variable is found when loaded but any directory in the form ".cmdstan/cmdstan-[version]", for example ".cmdstan/cmdstan-2.23.0", exists in the user’s home directory (Sys.getenv("HOME"), not the current working directory) then the path to the CmdStan with the largest version number will be set as the path to CmdStan for the R session. This is the same as the default directory that install_cmdstan() uses to install the latest version of CmdStan, so if that’s how you installed CmdStan you shouldn’t need to manually set the path to CmdStan when loading CmdStanR.

    4. +
    5. If the environment variable "CMDSTAN" exists at load +time then its value will be automatically set as the default path to +CmdStan for the R session. This is useful if your CmdStan installation +is not located in the default directory that would have been used by +install_cmdstan() (see #2).

    6. +
    7. If no environment variable is found when loaded but any directory +in the form ".cmdstan/cmdstan-[version]", for example +".cmdstan/cmdstan-2.23.0", exists in the user’s home +directory (Sys.getenv("HOME"), not the current +working directory) then the path to the CmdStan with the largest version +number will be set as the path to CmdStan for the R session. This is the +same as the default directory that install_cmdstan() uses +to install the latest version of CmdStan, so if that’s how you installed +CmdStan you shouldn’t need to manually set the path to CmdStan when +loading CmdStanR.

    -

    If neither of these applies (or you want to subsequently change the path) you can use the set_cmdstan_path() function:

    +

    If neither of these applies (or you want to subsequently change the +path) you can use the set_cmdstan_path() function:

    -set_cmdstan_path(PATH_TO_CMDSTAN)
    -

    To check the path to the CmdStan installation and the CmdStan version number you can use cmdstan_path() and cmdstan_version():

    +set_cmdstan_path(PATH_TO_CMDSTAN) +

    To check the path to the CmdStan installation and the CmdStan version +number you can use cmdstan_path() and +cmdstan_version():

    -cmdstan_path()
    -
    [1] "/Users/jgabry/.cmdstan/cmdstan-2.29.1"
    +cmdstan_path() +
    [1] "/Users/jgabry/.cmdstan/cmdstan-2.32.2"
    -cmdstan_version()
    -
    [1] "2.29.1"
    - -
    -

    -Compiling a model

    -

    The cmdstan_model() function creates a new CmdStanModel object from a file containing a Stan program. Under the hood, CmdStan is called to translate a Stan program to C++ and create a compiled executable. Here we’ll use the example Stan program that comes with the CmdStan installation:

    +cmdstan_version()
    +
    [1] "2.32.2"
    + +
    +

    Compiling a model +

    +

    The cmdstan_model() function creates a new CmdStanModel +object from a file containing a Stan program. Under the hood, CmdStan is +called to translate a Stan program to C++ and create a compiled +executable. Here we’ll use the example Stan program that comes with the +CmdStan installation:

    -file <- file.path(cmdstan_path(), "examples", "bernoulli", "bernoulli.stan")
    -mod <- cmdstan_model(file)
    -

    The object mod is an R6 reference object of class CmdStanModel and behaves similarly to R’s reference class objects and those in object oriented programming languages. Methods are accessed using the $ operator. This design choice allows for CmdStanR and CmdStanPy to provide a similar user experience and share many implementation details.

    -

    The Stan program can be printed using the $print() method:

    +file <- file.path(cmdstan_path(), "examples", "bernoulli", "bernoulli.stan") +mod <- cmdstan_model(file)
    +

    The object mod is an R6 reference object of class CmdStanModel +and behaves similarly to R’s reference class objects and those in object +oriented programming languages. Methods are accessed using the +$ operator. This design choice allows for CmdStanR and CmdStanPy to provide a +similar user experience and share many implementation details.

    +

    The Stan program can be printed using the $print() +method:

    -mod$print()
    +mod$print()
    data {
       int<lower=0> N;
    -  array[N] int<lower=0,upper=1> y; // or int<lower=0,upper=1> y[N];
    +  array[N] int<lower=0,upper=1> y;
     }
     parameters {
       real<lower=0,upper=1> theta;
    @@ -207,26 +249,31 @@ 

    theta ~ beta(1,1); // uniform prior on interval 0,1 y ~ bernoulli(theta); }

    -

    The path to the compiled executable is returned by the $exe_file() method:

    +

    The path to the compiled executable is returned by the +$exe_file() method:

    -mod$exe_file()
    -
    [1] "/Users/jgabry/.cmdstan/cmdstan-2.29.1/examples/bernoulli/bernoulli"
    - -
    -

    -Running MCMC

    -

    The $sample() method for CmdStanModel objects runs Stan’s default MCMC algorithm. The data argument accepts a named list of R objects (like for RStan) or a path to a data file compatible with CmdStan (JSON or R dump).

    +mod$exe_file()
    +
    [1] "/Users/jgabry/.cmdstan/cmdstan-2.32.2/examples/bernoulli/bernoulli"
    + +
    +

    Running MCMC +

    +

    The $sample() +method for CmdStanModel +objects runs Stan’s default MCMC algorithm. The data +argument accepts a named list of R objects (like for RStan) or a path to +a data file compatible with CmdStan (JSON or R dump).

    -# names correspond to the data block in the Stan program
    -data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1))
    -
    -fit <- mod$sample(
    -  data = data_list, 
    -  seed = 123, 
    -  chains = 4, 
    -  parallel_chains = 4,
    -  refresh = 500 # print update every 500 iters
    -)
    +# names correspond to the data block in the Stan program +data_list <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) + +fit <- mod$sample( + data = data_list, + seed = 123, + chains = 4, + parallel_chains = 4, + refresh = 500 # print update every 500 iters +)
    Running MCMC with 4 parallel chains...
     
     Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
    @@ -260,79 +307,99 @@ 

    All 4 chains finished successfully. Mean chain execution time: 0.0 seconds. -Total execution time: 0.3 seconds.

    -

    There are many more arguments that can be passed to the $sample() method. For details follow this link to its separate documentation page:

    +Total execution time: 0.4 seconds.
    +

    There are many more arguments that can be passed to the +$sample() method. For details follow this link to its +separate documentation page:

    -

    The $sample() method creates R6 CmdStanMCMC objects, which have many associated methods. Below we will demonstrate some of the most important methods. For a full list, follow this link to the CmdStanMCMC documentation:

    +

    The $sample() method creates R6 CmdStanMCMC objects, +which have many associated methods. Below we will demonstrate some of +the most important methods. For a full list, follow this link to the +CmdStanMCMC documentation:

    -
    -

    -Posterior summary statistics

    -
    -

    -Summaries from the posterior package

    -

    The $summary() method calls summarise_draws() from the posterior package. The first argument specifies the variables to summarize and any arguments after that are passed on to posterior::summarise_draws() to specify which summaries to compute, whether to use multiple cores, etc.

    +
    +

    Posterior summary statistics +

    +
    +

    Summaries from the posterior package +

    +

    The $summary() +method calls summarise_draws() from the +posterior package. The first argument specifies the +variables to summarize and any arguments after that are passed on to +posterior::summarise_draws() to specify which summaries to +compute, whether to use multiple cores, etc.

    -fit$summary()
    -
    # A tibble: 2 × 10
    -  variable   mean median    sd   mad      q5    q95  rhat ess_bulk ess_tail
    -  <chr>     <dbl>  <dbl> <dbl> <dbl>   <dbl>  <dbl> <dbl>    <dbl>    <dbl>
    -1 lp__     -7.27  -7.00  0.709 0.344 -8.70   -6.75   1.00    1852.    2114.
    -2 theta     0.247  0.232 0.119 0.123  0.0804  0.466  1.00    1611.    1678.
    -
    -fit$summary(variables = c("theta", "lp__"), "mean", "sd")
    -
    # A tibble: 2 × 3
    -  variable   mean    sd
    -  <chr>     <dbl> <dbl>
    -1 theta     0.247 0.119
    -2 lp__     -7.27  0.709
    -
    -# use a formula to summarize arbitrary functions, e.g. Pr(theta <= 0.5)
    -fit$summary("theta", pr_lt_half = ~ mean(. <= 0.5))
    -
    # A tibble: 1 × 2
    -  variable pr_lt_half
    -  <chr>         <dbl>
    -1 theta         0.969
    -
    -
    -

    -CmdStan’s stansummary utility

    -

    CmdStan itself provides a stansummary utility that can be called using the $cmdstan_summary() method. This method will print summaries but won’t return anything.

    -
    -
    -
    -

    -Posterior draws

    -
    -

    -Extracting draws

    -

    The $draws() method can be used to extract the posterior draws in formats provided by the posterior package. Here we demonstrate only the draws_array and draws_df formats, but the posterior package supports other useful formats as well.

    -
    -# default is a 3-D draws_array object from the posterior package
    -# iterations x chains x variables
    -draws_arr <- fit$draws() # or format="array"
    -str(draws_arr)
    +fit$summary() +fit$summary(variables = c("theta", "lp__"), "mean", "sd") + +# use a formula to summarize arbitrary functions, e.g. Pr(theta <= 0.5) +fit$summary("theta", pr_lt_half = ~ mean(. <= 0.5)) + +# summarise all variables with default and additional summary measures +fit$summary( + variables = NULL, + posterior::default_summary_measures(), + extra_quantiles = ~posterior::quantile2(., probs = c(.0275, .975)) +)
    +
      variable  mean median   sd  mad    q5   q95 rhat ess_bulk ess_tail
    +1     lp__ -7.27  -7.00 0.71 0.34 -8.70 -6.75    1     1852     2114
    +2    theta  0.25   0.23 0.12 0.12  0.08  0.47    1     1611     1678
    +
      variable  mean   sd
    +1    theta  0.25 0.12
    +2     lp__ -7.27 0.71
    +
      variable pr_lt_half
    +1    theta       0.97
    +
      variable  mean median   sd  mad    q5   q95  q2.75 q97.5
    +1     lp__ -7.27  -7.00 0.71 0.34 -8.70 -6.75 -9.165 -6.75
    +2    theta  0.25   0.23 0.12 0.12  0.08  0.47  0.065  0.52
    +
    +
    +

    CmdStan’s stansummary utility +

    +

    CmdStan itself provides a stansummary utility that can +be called using the $cmdstan_summary() method. This method +will print summaries but won’t return anything.

    +
    +
    +
    +

    Posterior draws +

    +
    +

    Extracting draws +

    +

    The $draws() +method can be used to extract the posterior draws in formats provided by +the posterior +package. Here we demonstrate only the draws_array and +draws_df formats, but the posterior +package supports other useful formats as well.

    +
    +# default is a 3-D draws_array object from the posterior package
    +# iterations x chains x variables
    +draws_arr <- fit$draws() # or format="array"
    +str(draws_arr)
     'draws_array' num [1:1000, 1:4, 1:2] -6.78 -6.9 -7.05 -6.85 -6.75 ...
      - attr(*, "dimnames")=List of 3
       ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
       ..$ chain    : chr [1:4] "1" "2" "3" "4"
       ..$ variable : chr [1:2] "lp__" "theta"
    -
    -# draws x variables data frame
    -draws_df <- fit$draws(format = "df")
    -str(draws_df)
    +
    +# draws x variables data frame
    +draws_df <- fit$draws(format = "df")
    +str(draws_df)
    draws_df [4,000 × 5] (S3: draws_df/draws/tbl_df/tbl/data.frame)
      $ lp__      : num [1:4000] -6.78 -6.9 -7.05 -6.85 -6.75 ...
      $ theta     : num [1:4000] 0.284 0.186 0.162 0.196 0.252 ...
      $ .chain    : int [1:4000] 1 1 1 1 1 1 1 1 1 1 ...
      $ .iteration: int [1:4000] 1 2 3 4 5 6 7 8 9 10 ...
      $ .draw     : int [1:4000] 1 2 3 4 5 6 7 8 9 10 ...
    -
    -print(draws_df)
    +
    +print(draws_df)
    # A draws_df: 1000 iterations, 4 chains, and 2 variables
        lp__ theta
     1  -6.8  0.28
    @@ -347,41 +414,52 @@ 

    10 -7.5 0.42 # ... with 3990 more draws # ... hidden reserved variables {'.chain', '.iteration', '.draw'}

    -

    To convert an existing draws object to a different format use the posterior::as_draws_*() functions.

    -
    -# this should be identical to draws_df created via draws(format = "df")
    -draws_df_2 <- as_draws_df(draws_arr)
    -identical(draws_df, draws_df_2)
    +

    To convert an existing draws object to a different format use the +posterior::as_draws_*() functions.

    +
    +# this should be identical to draws_df created via draws(format = "df")
    +draws_df_2 <- as_draws_df(draws_arr)
    +identical(draws_df, draws_df_2)
    [1] TRUE
    -

    In general, converting to a different draws format in this way will be slower than just setting the appropriate format initially in the call to the $draws() method, but in most cases the speed difference will be minor.

    -
    -
    -

    -Plotting draws

    -

    Plotting posterior distributions is as easy as passing the object returned by the $draws() method directly to plotting functions in our bayesplot package.

    -
    -mcmc_hist(fit$draws("theta"))
    +

    In general, converting to a different draws format in this way will +be slower than just setting the appropriate format initially in the call +to the $draws() method, but in most cases the speed +difference will be minor.

    +
    +
    +

    Plotting draws +

    +

    Plotting posterior distributions is as easy as passing the object +returned by the $draws() method directly to plotting +functions in our bayesplot +package.

    +
    +mcmc_hist(fit$draws("theta"))

    -
    -

    -Sampler diagnostics

    -
    -

    -Extracting diagnostic values for each iteration and chain

    -

    The $sampler_diagnostics() method extracts the values of the sampler parameters (treedepth__, divergent__, etc.) in formats supported by the posterior package. The default is as a 3-D array (iteration x chain x variable).

    -
    -# this is a draws_array object from the posterior package
    -str(fit$sampler_diagnostics())
    +
    +

    Sampler diagnostics +

    +
    +

    Extracting diagnostic values for each iteration and chain +

    +

    The $sampler_diagnostics() +method extracts the values of the sampler parameters +(treedepth__, divergent__, etc.) in formats +supported by the posterior package. The default is as a +3-D array (iteration x chain x variable).

    +
    +# this is a draws_array object from the posterior package
    +str(fit$sampler_diagnostics())
     'draws_array' num [1:1000, 1:4, 1:6] 1 2 2 2 2 1 1 1 1 2 ...
      - attr(*, "dimnames")=List of 3
       ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
       ..$ chain    : chr [1:4] "1" "2" "3" "4"
       ..$ variable : chr [1:6] "treedepth__" "divergent__" "energy__" "accept_stat__" ...
    -
    -# this is a draws_df object from the posterior package
    -str(fit$sampler_diagnostics(format = "df"))
    +
    +# this is a draws_df object from the posterior package
    +str(fit$sampler_diagnostics(format = "df"))
    draws_df [4,000 × 9] (S3: draws_df/draws/tbl_df/tbl/data.frame)
      $ treedepth__  : num [1:4000] 1 2 2 2 2 1 1 1 1 2 ...
      $ divergent__  : num [1:4000] 0 0 0 0 0 0 0 0 0 0 ...
    @@ -393,12 +471,14 @@ 

    $ .iteration : int [1:4000] 1 2 3 4 5 6 7 8 9 10 ... $ .draw : int [1:4000] 1 2 3 4 5 6 7 8 9 10 ...

    -
    -

    -Sampler diagnostic warnings and summaries

    -

    The $diagnostic_summary() method will display any sampler diagnostic warnings and return a summary of diagnostics for each chain.

    -
    -fit$diagnostic_summary()
    +
    +

    Sampler diagnostic warnings and summaries +

    +

    The $diagnostic_summary() method will display any +sampler diagnostic warnings and return a summary of diagnostics for each +chain.

    +
    +fit$diagnostic_summary()
    $num_divergent
     [1] 0 0 0 0
     
    @@ -406,93 +486,113 @@ 

    [1] 0 0 0 0 $ebfmi -[1] 1.017555 1.250490 1.078559 1.237357

    -

    We see the number of divergences for each of the four chains, the number of times the maximum treedepth was hit for each chain, and the E-BFMI for each chain.

    -

    In this case there were no warnings, so in order to demonstrate the warning messages we’ll use one of the CmdStanR example models that suffers from divergences.

    -
    -fit_with_warning <- cmdstanr_example("schools")
    -
    Warning: 76 of 4000 (2.0%) transitions ended with a divergence.
    +[1] 1.0 1.3 1.1 1.2
    +

    We see the number of divergences for each of the four chains, the +number of times the maximum treedepth was hit for each chain, and the +E-BFMI for each chain.

    +

    In this case there were no warnings, so in order to demonstrate the +warning messages we’ll use one of the CmdStanR example models that +suffers from divergences.

    +
    +fit_with_warning <- cmdstanr_example("schools")
    +
    Warning: 185 of 4000 (5.0%) transitions ended with a divergence.
     See https://mc-stan.org/misc/warnings for details.
    -

    After fitting there is a warning about divergences. We can also regenerate this warning message later using fit$diagnostic_summary().

    -
    -diagnostics <- fit_with_warning$diagnostic_summary()
    -
    Warning: 76 of 4000 (2.0%) transitions ended with a divergence.
    +

    After fitting there is a warning about divergences. We can also +regenerate this warning message later using +fit$diagnostic_summary().

    +
    +diagnostics <- fit_with_warning$diagnostic_summary()
    +
    Warning: 185 of 4000 (5.0%) transitions ended with a divergence.
     See https://mc-stan.org/misc/warnings for details.
    -
    -print(diagnostics)
    +
    +print(diagnostics)
    $num_divergent
    -[1] 22 19 33  2
    +[1]   8 120  17  40
     
     $num_max_treedepth
     [1] 0 0 0 0
     
     $ebfmi
    -[1] 0.3826859 0.3953390 0.2349030 0.2268031
    -
    -# number of divergences reported in warning is the sum of the per chain values
    -sum(diagnostics$num_divergent) 
    -
    [1] 76
    -
    -
    -

    -CmdStan’s diagnose utility

    -

    CmdStan itself provides a diagnose utility that can be called using the $cmdstan_diagnose() method. This method will print warnings but won’t return anything.

    -
    -
    -
    -

    -Create a stanfit object

    -

    If you have RStan installed then it is also possible to create a stanfit object from the csv output files written by CmdStan. This can be done by using rstan::read_stan_csv() in combination with the $output_files() method of the CmdStanMCMC object. This is only needed if you want to fit a model with CmdStanR but already have a lot of post-processing code that assumes a stanfit object. Otherwise we recommend using the post-processing functionality provided by CmdStanR itself.

    +[1] 0.29 0.24 0.26 0.30
    +
    +# number of divergences reported in warning is the sum of the per chain values
    +sum(diagnostics$num_divergent)
    +
    [1] 185
    +
    +
    +

    CmdStan’s diagnose utility +

    +

    CmdStan itself provides a diagnose utility that can be +called using the $cmdstan_diagnose() method. This method +will print warnings but won’t return anything.

    +
    +
    +
    +

    Create a stanfit object +

    +

    If you have RStan installed then it is also possible to create a +stanfit object from the csv output files written by +CmdStan. This can be done by using rstan::read_stan_csv() +in combination with the $output_files() method of the +CmdStanMCMC object. This is only needed if you want to fit +a model with CmdStanR but already have a lot of post-processing code +that assumes a stanfit object. Otherwise we recommend using +the post-processing functionality provided by CmdStanR itself.

    +
    +stanfit <- rstan::read_stan_csv(fit$output_files())
    +
    +
    +
    +

    Running optimization and variational inference +

    +

    CmdStanR also supports running Stan’s optimization algorithms and its +algorithms for variational approximation of full Bayesian inference. +These are run via the $optimize() and +$variational() methods, which are called in a similar way +to the $sample() method demonstrated above.

    +
    +

    Optimization +

    +

    We can find the (penalized) maximum likelihood estimate (MLE) using +$optimize().

    -stanfit <- rstan::read_stan_csv(fit$output_files())
    -
    -
    -
    -

    -Running optimization and variational inference

    -

    CmdStanR also supports running Stan’s optimization algorithms and its algorithms for variational approximation of full Bayesian inference. These are run via the $optimize() and $variational() methods, which are called in a similar way to the $sample() method demonstrated above.

    -
    -

    -Optimization

    -

    We can find the (penalized) maximum likelihood estimate (MLE) using $optimize().

    -
    -fit_mle <- mod$optimize(data = data_list, seed = 123) 
    +fit_mle <- mod$optimize(data = data_list, seed = 123) +fit_mle$summary() # includes lp__ (log prob calculated by Stan program) +fit_mle$mle("theta")
    Initial log joint probability = -9.51104 
         Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes  
            6      -5.00402   0.000103557   2.55661e-07           1           1        9    
     Optimization terminated normally:  
       Convergence detected: relative gradient magnitude is below tolerance 
    -Finished in  0.1 seconds.
    -
    -fit_mle$summary() # includes lp__ (log prob calculated by Stan program)
    -
    # A tibble: 2 × 2
    -  variable estimate
    -  <chr>       <dbl>
    -1 lp__        -5.00
    -2 theta        0.2 
    -
    -fit_mle$mle("theta") 
    -
    theta 
    -  0.2 
    -

    Here’s a plot comparing the penalized MLE to the posterior distribution of theta.

    -
    -mcmc_hist(fit$draws("theta")) + 
    -  vline_at(fit_mle$mle("theta"), size = 1.5)
    +Finished in 0.2 seconds. +
      variable estimate
    +1     lp__     -5.0
    +2    theta      0.2
    +
    theta 
    +  0.2 
    +

    Here’s a plot comparing the penalized MLE to the posterior +distribution of theta.

    +
    +mcmc_hist(fit$draws("theta")) +
    +  vline_at(fit_mle$mle("theta"), size = 1.5)

    -
    -

    -Variational Bayes

    -

    We can run Stan’s experimental variational Bayes algorithm (ADVI) using the $variational() method.

    -
    -fit_vb <- mod$variational(data = data_list, seed = 123, output_samples = 4000) 
    +
    +

    Variational Bayes +

    +

    We can run Stan’s experimental variational Bayes algorithm (ADVI) +using the $variational() +method.

    +
    +fit_vb <- mod$variational(data = data_list, seed = 123, output_samples = 4000)
    +fit_vb$summary("theta")
    ------------------------------------------------------------ 
     EXPERIMENTAL ALGORITHM: 
       This procedure has not been thoroughly tested and may be unstable 
       or buggy. The interface is subject to change. 
     ------------------------------------------------------------ 
    -Gradient evaluation took 4e-06 seconds 
    -1000 transitions using 10 leapfrog steps per transition would take 0.04 seconds. 
    +Gradient evaluation took 6e-06 seconds 
    +1000 transitions using 10 leapfrog steps per transition would take 0.06 seconds. 
     Adjust your expectations accordingly! 
     Begin eta adaptation. 
     Iteration:   1 / 250 [  0%]  (Adaptation) 
    @@ -509,92 +609,115 @@ 

    Drawing a sample of size 4000 from the approximate posterior... COMPLETED. Finished in 0.1 seconds.

    -
    -fit_vb$summary("theta")
    -
    # A tibble: 1 × 7
    -  variable  mean median    sd   mad    q5   q95
    -  <chr>    <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
    -1 theta    0.267  0.250 0.117 0.117 0.105 0.487
    -

    The $draws() method can be used to access the approximate posterior draws. Let’s extract the draws, make the same plot we made after MCMC, and compare the two. In this trivial example the distributions look quite similar, although the variational approximation slightly underestimates the posterior standard deviation.

    -
    -mcmc_hist(fit$draws("theta"), binwidth = 0.025)
    +
      variable mean median   sd  mad  q5  q95
    +1    theta 0.27   0.25 0.12 0.12 0.1 0.49
    +

    The $draws() method can be used to access the +approximate posterior draws. Let’s extract the draws, make the same plot +we made after MCMC, and compare the two. In this trivial example the +distributions look quite similar, although the variational approximation +slightly underestimates the posterior standard deviation.

    +
    +mcmc_hist(fit$draws("theta"), binwidth = 0.025)
    Posterior from MCMC

    Posterior from MCMC

    -
    -mcmc_hist(fit_vb$draws("theta"), binwidth = 0.025)
    +
    +mcmc_hist(fit_vb$draws("theta"), binwidth = 0.025)
    Posterior from variational

    Posterior from variational

    -

    For more details on the $optimize() and $variational() methods, follow these links to their documentation pages.

    +

    For more details on the $optimize() and +$variational() methods, follow these links to their +documentation pages.

    -
    -

    -Saving fitted model objects

    -

    In order to save a fitted model object to disk and ensure that all of the contents are available when reading the object back into R, we recommend using the $save_object() method provided by CmdStanR. The reason for this is discussed in detail in the vignette How does CmdStanR work?, so here we just demonstrate how to use the method.

    -
    -fit$save_object(file = "fit.RDS")
    -
    -# can be read back in using readRDS
    -fit2 <- readRDS("fit.RDS")
    -
    -
    -

    -Comparison with RStan

    -
    -

    -Different ways of interfacing with Stan’s C++

    -

    The RStan interface (rstan package) is an in-memory interface to Stan and relies on R packages like Rcpp and inline to call C++ code from R. On the other hand, the CmdStanR interface does not directly call any C++ code from R, instead relying on the CmdStan interface behind the scenes for compilation, running algorithms, and writing results to output files.

    -
    -
    -

    -Advantages of RStan

    -
      -
    • Advanced features. We are working on making these available outside of RStan but currently they are only available to R users via RStan: +
      +

      Saving fitted model objects +

      +

      In order to save a fitted model object to disk and ensure that all of +the contents are available when reading the object back into R, we +recommend using the $save_object() +method provided by CmdStanR. The reason for this is discussed in detail +in the vignette How +does CmdStanR work?, so here we just demonstrate how to use the +method.

      +
      +fit$save_object(file = "fit.RDS")
      +
      +# can be read back in using readRDS
      +fit2 <- readRDS("fit.RDS")
      +
      +
      +

      Comparison with RStan +

      +
      +

      Different ways of interfacing with Stan’s C++ +

      +

      The RStan interface (rstan package) is +an in-memory interface to Stan and relies on R packages like +Rcpp and inline to call C++ code from +R. On the other hand, the CmdStanR interface does not directly call any +C++ code from R, instead relying on the CmdStan interface behind the +scenes for compilation, running algorithms, and writing results to +output files.

      +
      +
    • -
    • Allows other developers to distribute R packages with pre-compiled Stan programs (like rstanarm) on CRAN.
    • +
    • Allows other developers to distribute R packages with +pre-compiled Stan programs (like rstanarm) on +CRAN.

    • +
    • Avoids use of R6 classes, which may result in more familiar +syntax for many R users.

    -
    -

    -Advantages of CmdStanR

    +
    +

    Advantages of CmdStanR +

      -
    • Compatible with latest versions of Stan. Keeping up with Stan releases is complicated for RStan, often requiring non-trivial changes to the rstan package and new CRAN releases of both rstan and StanHeaders. With CmdStanR the latest improvements in Stan will be available from R immediately after updating CmdStan using cmdstanr::install_cmdstan().

    • -
    • Fewer installation issues (e.g., no need to mess with Makevars files).

    • -
    • Running Stan via external processes results in fewer unexpected crashes, especially in RStudio.

    • +
    • Compatible with latest versions of Stan. Keeping up with Stan +releases is complicated for RStan, often requiring non-trivial changes +to the rstan package and new CRAN releases of both +rstan and StanHeaders. With CmdStanR +the latest improvements in Stan will be available from R immediately +after updating CmdStan using +cmdstanr::install_cmdstan().

    • +
    • Running Stan via external processes results in fewer unexpected +crashes, especially in RStudio.

    • Less memory overhead.

    • -
    • More permissive license. RStan uses the GPL-3 license while the license for CmdStanR is BSD-3, which is a bit more permissive and is the same license used for CmdStan and the Stan C++ source code.

    • +
    • More permissive license. RStan uses the GPL-3 license while the +license for CmdStanR is BSD-3, which is a bit more permissive and is the +same license used for CmdStan and the Stan C++ source code.

    -
    -

    -Additional resources

    -

    There are additional vignettes available that discuss other aspects of using CmdStanR. These can be found online at the CmdStanR website:

    +
    +

    Additional resources +

    +

    There are additional vignettes available that discuss other aspects +of using CmdStanR. These can be found online at the CmdStanR +website:

    To ask a question please post on the Stan forums:

    -

    To report a bug, suggest a feature (including additions to these vignettes), or to start contributing to CmdStanR development (new contributors welcome!) please open an issue on GitHub:

    +

    To report a bug, suggest a feature (including additions to these +vignettes), or to start contributing to CmdStanR development (new +contributors welcome!) please open an issue on GitHub:

    @@ -610,11 +733,13 @@

    -

    Site built with pkgdown 1.6.1.

    +

    +

    Site built with pkgdown 2.0.7.

    @@ -623,5 +748,7 @@

    + + diff --git a/docs/articles/cmdstanr_files/figure-html/plot-mle-1.png b/docs/articles/cmdstanr_files/figure-html/plot-mle-1.png index 1998cf9a6..ea9c07442 100644 Binary files a/docs/articles/cmdstanr_files/figure-html/plot-mle-1.png and b/docs/articles/cmdstanr_files/figure-html/plot-mle-1.png differ diff --git a/docs/articles/cmdstanr_files/figure-html/plot-variational-1-1.png b/docs/articles/cmdstanr_files/figure-html/plot-variational-1-1.png index de5aca39a..360d4aeb7 100644 Binary files a/docs/articles/cmdstanr_files/figure-html/plot-variational-1-1.png and b/docs/articles/cmdstanr_files/figure-html/plot-variational-1-1.png differ diff --git a/docs/articles/cmdstanr_files/figure-html/plot-variational-2-1.png b/docs/articles/cmdstanr_files/figure-html/plot-variational-2-1.png index 7d8aa6241..2daa2b15a 100644 Binary files a/docs/articles/cmdstanr_files/figure-html/plot-variational-2-1.png and b/docs/articles/cmdstanr_files/figure-html/plot-variational-2-1.png differ diff --git a/docs/articles/cmdstanr_files/figure-html/plots-1.png b/docs/articles/cmdstanr_files/figure-html/plots-1.png index 4d8475860..1ffef3765 100644 Binary files a/docs/articles/cmdstanr_files/figure-html/plots-1.png and b/docs/articles/cmdstanr_files/figure-html/plots-1.png differ diff --git a/docs/articles/deprecations.html b/docs/articles/deprecations.html index e889922a4..a579688f7 100644 --- a/docs/articles/deprecations.html +++ b/docs/articles/deprecations.html @@ -26,6 +26,8 @@ + +
    +
    -
    -

    -Introduction

    -

    This vignette demonstrates how to handle cases where your Stan program contains deprecated features resulting in deprecation warnings. In most cases, the Stan-to-C++ compiler can be used to automatically update your code to a non-deprecated feature that replaces the deprecated one. This vignette showcases how that automatic conversion can be done using CmdStanR.

    -

    The automatic conversion of deprecated features to non-deprecated features is done using the so-called “canonicalizer”, which is part of the Stan-to-C++ compiler. We recommend using CmdStan 2.29.2 or later when using the canonicalizer and this vignette. The minimum CmdStanR version to run the code in the vignette is 0.5.0.

    +
    +

    Introduction +

    +

    This vignette demonstrates how to handle cases where your Stan +program contains deprecated features resulting in deprecation warnings. +In most cases, the Stan-to-C++ compiler can be used to automatically +update your code to a non-deprecated feature that replaces the +deprecated one. This vignette showcases how that automatic conversion +can be done using CmdStanR.

    +

    The automatic conversion of deprecated features to non-deprecated +features is done using the so-called “canonicalizer”, which is part of +the Stan-to-C++ compiler. We recommend using CmdStan 2.29.2 or later +when using the canonicalizer and this vignette. The minimum CmdStanR +version to run the code in the vignette is 0.5.0.

    -library(cmdstanr)
    -check_cmdstan_toolchain(fix = TRUE, quiet = TRUE)
    +library(cmdstanr) +check_cmdstan_toolchain(fix = TRUE, quiet = TRUE)
    -
    -

    -Deprecation warnings

    -

    The following logistic regression model uses several deprecated language features, resulting in several warnings during compilation.

    +
    +

    Deprecation warnings +

    +

    The following logistic regression model uses several deprecated +language features, resulting in several warnings during compilation.

    -stan_file <- write_stan_file("
    -data {
    -  int<lower=1> k;
    -  int<lower=0> n;
    -  matrix[n, k] X;
    -  int y[n];
    -}
    -parameters {
    -  vector[k] beta;
    -  real alpha;
    -}
    -model {
    -  # priors
    -  target += std_normal_log(beta);
    -  alpha ~ std_normal();
    -  
    -  y ~ bernoulli_logit(X * beta + alpha);
    -}
    -")
    -mod <- cmdstan_model(stan_file)
    -
    Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmplsuXnS/model-840d5c7122c.stan', line 6, column 2: Declaration
    +stan_file <- write_stan_file("
    +data {
    +  int<lower=1> k;
    +  int<lower=0> n;
    +  matrix[n, k] X;
    +  int y[n];
    +}
    +parameters {
    +  vector[k] beta;
    +  real alpha;
    +}
    +model {
    +  # priors
    +  target += std_normal_log(beta);
    +  alpha ~ std_normal();
    +
    +  y ~ bernoulli_logit(X * beta + alpha);
    +}
    +")
    +mod <- cmdstan_model(stan_file)
    +
    Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMBUSHs/model-17e6a34f96f68.stan', line 6, column 2: Declaration
         of arrays by placing brackets after a variable name is deprecated and
    -    will be removed in Stan 2.32.0. Instead use the array keyword before the
    +    will be removed in Stan 2.33.0. Instead use the array keyword before the
         type. This can be changed automatically using the auto-format flag to
         stanc
    -Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmplsuXnS/model-840d5c7122c.stan', line 13, column 2: Comments
    +Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMBUSHs/model-17e6a34f96f68.stan', line 13, column 2: Comments
         beginning with # are deprecated and this syntax will be removed in Stan
    -    2.32.0. Use // to begin line comments; this can be done automatically
    +    2.33.0. Use // to begin line comments; this can be done automatically
         using the auto-format flag to stanc
    -Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmplsuXnS/model-840d5c7122c.stan', line 14, column 12: std_normal_log
    -    is deprecated and will be removed in Stan 2.32.0. Use std_normal_lpdf
    +Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpMBUSHs/model-17e6a34f96f68.stan', line 14, column 12: std_normal_log
    +    is deprecated and will be removed in Stan 2.33.0. Use std_normal_lpdf
         instead. This can be automatically changed using the canonicalize flag
         for stanc

    The first warning is about using the deprecated array syntax

    int y[n];
    -

    which should be replaced with the new syntax using the array keyword:

    +

    which should be replaced with the new syntax using the +array keyword:

    array[n] int y;
    -

    The second warning is about using the deprecated commenting symbol #, which should be replaced by //.

    -

    The last warning is about the use of the deprecated _log suffix for probability density and mass functions. In this case the _log suffix should be replaced with _lpdf. For probability mass functions the suffix _lpmf is used.

    -

    We can go and fix these issues manually or use the canonicalizer as outlined in the next section.

    +

    The second warning is about using the deprecated commenting symbol +#, which should be replaced by //.

    +

    The last warning is about the use of the deprecated _log +suffix for probability density and mass functions. In this case the +_log suffix should be replaced with _lpdf. For +probability mass functions the suffix _lpmf is used.

    +

    We can go and fix these issues manually or use the canonicalizer as +outlined in the next section.

    -
    -

    -Using the canonicalizer

    -

    The canonicalizer is available through the canonicalize argument of the $format() method of the CmdStanModel class. The arguments accepts TRUE and FALSE values, in which case all or none of the features of the canonicalizer are used. It can also accept a list of character vectors that determine which features of the canonicalizer to use.

    -

    The canonincalizer in CmdStan 2.29.2 supports four features: parentheses, braces, includes and deprecations. The parentheses and braces features clean up the use of parentheses and braces, while includes will replace #include statements with the code from the included files. See the canonicalizer section of the Stan User’s Guide for more details.

    -

    In this vignette we will be using the deprecations feature that replaces deprecated Stan model features with non-deprecated ones if possible.

    +
    +

    Using the canonicalizer +

    +

    The canonicalizer is available through the canonicalize +argument of the $format() method of the +CmdStanModel class. The arguments accepts TRUE +and FALSE values, in which case all or none of the features +of the canonicalizer are used. It can also accept a list of character +vectors that determine which features of the canonicalizer to use.

    +

    The canonincalizer in CmdStan 2.29.2 supports four features: +parentheses, braces, includes and +deprecations. The parentheses and +braces features clean up the use of parentheses and braces, +while includes will replace #include +statements with the code from the included files. See the canonicalizer +section of the Stan User’s Guide for more details.

    +

    In this vignette we will be using the deprecations +feature that replaces deprecated Stan model features with non-deprecated +ones if possible.

    -mod$format(canonicalize = list("deprecations"))
    +mod$format(canonicalize = list("deprecations"))
    data {
       int<lower=1> k;
       int<lower=0> n;
    @@ -219,15 +252,25 @@ 

    y ~ bernoulli_logit(X * beta + alpha); }

    -

    By default, the format function will print the resulting model code. We can see that all three issues were resolved. y is now defined using the new array keyword, the comment uses // and the std_normal_log() is replaced with std_normal_lpdf().

    -

    You can also use the $format() method to write the updated version of the model directly to the Stan model file. That can be enabled by setting overwrite_file = TRUE. The previous version of the file will automatically be backed up to a file with the .stan.bak suffix. If that is not desired or you are using a version system and making a backup is redundant, you can disable it by setting backup = FALSE.

    +

    By default, the format function will print the resulting model code. +We can see that all three issues were resolved. y is now +defined using the new array keyword, the comment uses // +and the std_normal_log() is replaced with +std_normal_lpdf().

    +

    You can also use the $format() method to write the +updated version of the model directly to the Stan model file. That can +be enabled by setting overwrite_file = TRUE. The previous +version of the file will automatically be backed up to a file with the +.stan.bak suffix. If that is not desired or you are using a +version system and making a backup is redundant, you can disable it by +setting backup = FALSE.

    -mod$format(
    -    canonicalize = list("deprecations"),
    -    overwrite_file = TRUE,
    -    backup = FALSE
    -)
    -mod$print()
    +mod$format( + canonicalize = list("deprecations"), + overwrite_file = TRUE, + backup = FALSE +) +mod$print()
    data {
       int<lower=1> k;
       int<lower=0> n;
    @@ -259,11 +302,13 @@ 

    -

    Site built with pkgdown 1.6.1.

    +

    +

    Site built with pkgdown 2.0.7.

    @@ -272,5 +317,7 @@

    + + diff --git a/docs/articles/index.html b/docs/articles/index.html index 4daf22f4d..dfb4bb983 100644 --- a/docs/articles/index.html +++ b/docs/articles/index.html @@ -1,74 +1,12 @@ - - - - - - - -Articles • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Articles • cmdstanr - - - - + + -
    -
    - -
    - -
    +

    More details

    -

    More information about compilation, passing in data, how CmdStan ouput is written to CSV and read back into R, profiling Stan programs, running Stan on GPUs, and using CmdStanR in R Markdown documents.

    - -
    -
    How does CmdStanR work?
    -
    -
    R Markdown CmdStan Engine
    -
    -
    Handling deprecated Stan features with the canonicalizer in CmdStanR
    -
    -
    Profiling Stan programs with CmdStanR
    -
    -
    Running Stan on the GPU with OpenCL
    -
    -
    -
    +

    More information about compilation, passing in data, how CmdStan ouput is written to CSV and read back into R, profiling Stan programs, running Stan on GPUs, and using CmdStanR in R Markdown documents.

    + +
    How does CmdStanR work?
    +
    +
    Working with Posteriors
    +
    +
    R Markdown CmdStan Engine
    +
    +
    Handling deprecated Stan features with the canonicalizer in CmdStanR
    +
    +
    Profiling Stan programs with CmdStanR
    +
    +
    Running Stan on the GPU with OpenCL
    +
    +
    -
    - +
    - - + + diff --git a/docs/articles/posterior.html b/docs/articles/posterior.html new file mode 100644 index 000000000..0e0439f75 --- /dev/null +++ b/docs/articles/posterior.html @@ -0,0 +1,309 @@ + + + + + + + +Working with Posteriors • cmdstanr + + + + + + + + + + + + + + + + + + + +
    +
    + + + + +
    +
    + + + + + +
    +

    Summary +

    +

    We can easily customise the summary statistics reported by +$summary() and $print().

    +
    +fit <- cmdstanr::cmdstanr_example("schools", method = "sample")
    +fit$summary()
    +
    Warning: 130 of 4000 (3.0%) transitions ended with a divergence.
    +See https://mc-stan.org/misc/warnings for details.
    +
       variable  mean median  sd mad     q5 q95 rhat ess_bulk ess_tail
    +1      lp__ -58.9  -59.2 5.0 5.1 -66.97 -50    1      224       84
    +2        mu   6.6    6.7 4.3 4.3  -0.55  14    1      394      115
    +3       tau   5.8    5.0 3.7 3.4   1.33  13    1      223       92
    +4  theta[1]   9.7    9.0 7.3 6.3  -0.99  23    1     1066     2034
    +5  theta[2]   7.0    6.8 5.9 5.6  -2.54  16    1      900     2321
    +6  theta[3]   5.5    5.8 6.9 6.1  -6.25  16    1      841     2201
    +7  theta[4]   6.8    6.9 6.2 6.0  -3.13  17    1      781     2193
    +8  theta[5]   4.6    5.0 6.0 5.8  -5.63  14    1      513      940
    +9  theta[6]   5.5    5.7 6.3 5.6  -5.61  15    1      782     1784
    +10 theta[7]   9.5    9.1 6.2 5.8   0.14  20    1      882     2164
    +11 theta[8]   7.1    7.0 7.3 6.3  -4.80  18    1      976     2151
    +

    By default all variables are summaries with the follow functions:

    + +
    [1] "mean"      "median"    "sd"        "mad"       "quantile2"
    +

    To change the variables summarised, we use the variables argument

    +
    +fit$summary(variables = c("mu", "tau"))
    +
      variable mean median  sd mad    q5 q95 rhat ess_bulk ess_tail
    +1       mu  6.6    6.7 4.3 4.3 -0.55  14    1      394      115
    +2      tau  5.8    5.0 3.7 3.4  1.33  13    1      223       92
    +

    We can additionally change which functions are used

    +
    +fit$summary(variables = c("mu", "tau"), mean, sd)
    +
      variable mean  sd
    +1       mu  6.6 4.3
    +2      tau  5.8 3.7
    +

    To summarise all variables with non-default functions, it is +necessary to set explicitly set the variables argument, either to +NULL or the full vector of variable names.

    +
    +fit$metadata()$model_params
    +fit$summary(variables = NULL, "mean", "median")
    +
     [1] "lp__"     "mu"       "tau"      "theta[1]" "theta[2]" "theta[3]"
    + [7] "theta[4]" "theta[5]" "theta[6]" "theta[7]" "theta[8]"
    +
       variable  mean median
    +1      lp__ -58.9  -59.2
    +2        mu   6.6    6.7
    +3       tau   5.8    5.0
    +4  theta[1]   9.7    9.0
    +5  theta[2]   7.0    6.8
    +6  theta[3]   5.5    5.8
    +7  theta[4]   6.8    6.9
    +8  theta[5]   4.6    5.0
    +9  theta[6]   5.5    5.7
    +10 theta[7]   9.5    9.1
    +11 theta[8]   7.1    7.0
    +

    Summary functions can be specified by character string, function, or +using a formula (or anything else supported by [rlang::as_function]). If +these arguments are named, those names will be used in the tibble +output. If the summary results are named they will take precedence.

    +
    +my_sd <- function(x) c(My_SD = sd(x))
    +fit$summary(
    +  c("mu", "tau"), 
    +  MEAN = mean, 
    +  "median",
    +  my_sd,
    +  ~quantile(.x, probs = c(0.1, 0.9)),
    +  Minimum = function(x) min(x)
    +)        
    +
      variable MEAN median My_SD  10% 90% Minimum
    +1       mu  6.6    6.7   4.3 0.98  12   -11.7
    +2      tau  5.8    5.0   3.7 1.81  11     0.9
    +

    Arguments to all summary functions can also be specified with +.args.

    +
    +fit$summary(c("mu", "tau"), quantile, .args = list(probs = c(0.025, .05, .95, .975)))
    +
      variable 2.5%    5% 95% 97.5%
    +1       mu -2.0 -0.55  14    15
    +2      tau  1.1  1.33  13    15
    +

    The summary functions are applied to the array of sample values, with +dimension iter_samplingxchains.

    +
    +fit$summary(variables = NULL, dim, colMeans)
    +
       variable dim.1 dim.2     1     2     3     4
    +1      lp__  1000     4 -58.8 -58.4 -59.0 -59.4
    +2        mu  1000     4   6.8   6.7   6.6   6.1
    +3       tau  1000     4   5.7   5.6   5.7   6.1
    +4  theta[1]  1000     4   9.9   9.5   9.8   9.5
    +5  theta[2]  1000     4   7.4   7.2   7.0   6.3
    +6  theta[3]  1000     4   5.8   5.7   5.6   4.8
    +7  theta[4]  1000     4   6.9   6.7   7.0   6.7
    +8  theta[5]  1000     4   4.9   4.8   4.6   4.1
    +9  theta[6]  1000     4   5.7   5.8   5.6   4.8
    +10 theta[7]  1000     4   9.6   9.8   9.4   9.2
    +11 theta[8]  1000     4   7.0   7.3   7.0   7.0
    +

    For this reason users may have unexpected results if they use +stats::var() directly, as it will return a covariance +matrix. An alternative is the distributional::variance() +function, which can also be accessed via +posterior::variance().

    +
    +fit$summary(c("mu", "tau"), posterior::variance, ~var(as.vector(.x)))
    +
      variable posterior::variance ~var(as.vector(.x))
    +1       mu                  19                  19
    +2      tau                  14                  14
    +

    Summary functions need not be numeric, but these won’t work with +$print().

    +
    +strict_pos <- function(x) if (all(x > 0)) "yes" else "no"
    +fit$summary(variables = NULL, "Strictly Positive" = strict_pos)
    +# fit$print(variables = NULL, "Strictly Positive" = strict_pos)
    +
       variable Strictly Positive
    +1      lp__                no
    +2        mu                no
    +3       tau               yes
    +4  theta[1]                no
    +5  theta[2]                no
    +6  theta[3]                no
    +7  theta[4]                no
    +8  theta[5]                no
    +9  theta[6]                no
    +10 theta[7]                no
    +11 theta[8]                no
    +

    For more information, see posterior::summarise_draws(), +which is called by $summary().

    +
    +
    + + + +
    + + + +
    + +
    +

    +

    Site built with pkgdown 2.0.7.

    +
    + +
    +
    + + + + + + + + diff --git a/docs/articles/posterior_files/header-attrs-2.18/header-attrs.js b/docs/articles/posterior_files/header-attrs-2.18/header-attrs.js new file mode 100644 index 000000000..dd57d92e0 --- /dev/null +++ b/docs/articles/posterior_files/header-attrs-2.18/header-attrs.js @@ -0,0 +1,12 @@ +// Pandoc 2.9 adds attributes on both header and div. We remove the former (to +// be compatible with the behavior of Pandoc < 2.8). +document.addEventListener('DOMContentLoaded', function(e) { + var hs = document.querySelectorAll("div.section[class*='level'] > :first-child"); + var i, h, a; + for (i = 0; i < hs.length; i++) { + h = hs[i]; + if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6 + a = h.attributes; + while (a.length > 0) h.removeAttribute(a[0].name); + } +}); diff --git a/docs/articles/profiling.html b/docs/articles/profiling.html index 58cce66d3..0bb97c2aa 100644 --- a/docs/articles/profiling.html +++ b/docs/articles/profiling.html @@ -26,6 +26,8 @@ + +
    +
    -
    -

    -Introduction

    -

    This vignette demonstrates how to use the new profiling functionality introduced in CmdStan 2.26.0.

    -

    Profiling identifies which parts of a Stan program are taking the longest time to run and is therefore a useful guide when working on optimizing the performance of a model.

    -

    However, be aware that the statistical assumptions that go into a model are the most important factors in overall model performance. It is often not possible to make up for model problems with just brute force computation. For ideas on how to address performance of your model from a statistical perspective, see Gelman (2020).

    +
    +

    Introduction +

    +

    This vignette demonstrates how to use the new profiling functionality +introduced in CmdStan 2.26.0.

    +

    Profiling identifies which parts of a Stan program are taking the +longest time to run and is therefore a useful guide when working on +optimizing the performance of a model.

    +

    However, be aware that the statistical assumptions that go into a +model are the most important factors in overall model performance. It is +often not possible to make up for model problems with just brute force +computation. For ideas on how to address performance of your model from +a statistical perspective, see Gelman (2020).

    -library(cmdstanr)
    -check_cmdstan_toolchain(fix = TRUE, quiet = TRUE)
    +library(cmdstanr) +check_cmdstan_toolchain(fix = TRUE, quiet = TRUE)

    -
    -

    -Adding profiling statements to a Stan program

    -

    Consider a simple logistic regression with parameters alpha and beta, covariates X, and outcome y.

    +
    +

    Adding profiling statements to a Stan program +

    +

    Consider a simple logistic regression with parameters +alpha and beta, covariates X, and +outcome y.

    data {
       int<lower=1> k;
       int<lower=0> n;
       matrix[n, k] X;
    -  int y[n];
    +  array[n] int y;
     }
     parameters {
       vector[k] beta;
    @@ -169,7 +181,9 @@ 

    y ~ bernoulli_logit(X * beta + alpha); }

    -

    A simple question is how much time do the prior calculations take compared against the likelihood? To answer this we surround the prior and likelihood calculations with profile statements.

    +

    A simple question is how much time do the prior calculations take +compared against the likelihood? To answer this we surround the prior +and likelihood calculations with profile statements.

    profile("priors") {
       target += std_normal_lpdf(beta);
       target += std_normal_lpdf(alpha);
    @@ -177,132 +191,163 @@ 

    profile("likelihood") { target += bernoulli_logit_lpmf(y | X * beta + alpha); }

    -

    In general we recommend using a separate .stan file, but for convenience in this vignette we’ll write the Stan program as a string and use write_stan_file() to write it to a temporary file.

    +

    In general we recommend using a separate .stan file, but +for convenience in this vignette we’ll write the Stan program as a +string and use write_stan_file() to write it to a temporary +file.

    -profiling_bernoulli_logit <- write_stan_file('
    -data {
    -  int<lower=1> k;
    -  int<lower=0> n;
    -  matrix[n, k] X;
    -  int y[n];
    -}
    -parameters {
    -  vector[k] beta;
    -  real alpha;
    -}
    -model {
    -  profile("priors") {
    -    target += std_normal_lpdf(beta);
    -    target += std_normal_lpdf(alpha);
    -  }
    -  profile("likelihood") {
    -    target += bernoulli_logit_lpmf(y | X * beta + alpha);
    -  }
    -}
    -')
    -

    We can then run the model as usual and Stan will collect the profiling information for any sections with profile statements.

    +profiling_bernoulli_logit <- write_stan_file(' +data { + int<lower=1> k; + int<lower=0> n; + matrix[n, k] X; + array[n] int y; +} +parameters { + vector[k] beta; + real alpha; +} +model { + profile("priors") { + target += std_normal_lpdf(beta); + target += std_normal_lpdf(alpha); + } + profile("likelihood") { + target += bernoulli_logit_lpmf(y | X * beta + alpha); + } +} +')
    +

    We can then run the model as usual and Stan will collect the +profiling information for any sections with profile +statements.

    -# Compile the model
    -model <- cmdstan_model(profiling_bernoulli_logit)
    -
    -# Generate some fake data
    -n <- 1000
    -k <- 20
    -X <- matrix(rnorm(n * k), ncol = k)
    -
    -y <- 3 * X[,1] - 2 * X[,2] + 1
    -p <- runif(n)
    -y <- ifelse(p < (1 / (1 + exp(-y))), 1, 0)
    -stan_data <- list(k = ncol(X), n = nrow(X), y = y, X = X)
    -
    -# Run one chain of the model
    -fit <- model$sample(data = stan_data, chains = 1)
    +# Compile the model +model <- cmdstan_model(profiling_bernoulli_logit) + +# Generate some fake data +n <- 1000 +k <- 20 +X <- matrix(rnorm(n * k), ncol = k) + +y <- 3 * X[,1] - 2 * X[,2] + 1 +p <- runif(n) +y <- ifelse(p < (1 / (1 + exp(-y))), 1, 0) +stan_data <- list(k = ncol(X), n = nrow(X), y = y, X = X) + +# Run one chain of the model +fit <- model$sample(data = stan_data, chains = 1)
    -
    -

    -Accessing the profiling information from R

    -

    The raw profiling information can then be accessed with the $profiles() method, which returns a list containing one data frame per chain (profiles across multiple chains are not automatically aggregated). Details on the column names are available in the CmdStan documentation.

    +
    +

    Accessing the profiling information from R +

    +

    The raw profiling information can then be accessed with the +$profiles() method, which returns a list containing one +data frame per chain (profiles across multiple chains are not +automatically aggregated). Details on the column names are available in +the CmdStan +documentation.

    -fit$profiles()
    +fit$profiles()
    [[1]]
             name   thread_id total_time forward_time reverse_time chain_stack
    -1 likelihood 0x107702e00 0.79556800   0.64104100   0.15452700       52380
    -2     priors 0x107702e00 0.00536866   0.00314377   0.00222489       34920
    +1 likelihood 0x102122e00 0.71089600   0.58314100    0.1277560       51969
    +2     priors 0x102122e00 0.00482875   0.00293865    0.0018901       34646
       no_chain_stack autodiff_calls no_autodiff_calls
    -1       34920000          17460                 1
    -2              0          17460                 1
    -

    The total_time column is the total time spent inside a given profile statement. It is clear that the vast majority of time is spent in the likelihood function.

    +1 34646000 17323 1 +2 0 17323 1
    +

    The total_time column is the total time spent inside a +given profile statement. It is clear that the vast majority of time is +spent in the likelihood function.

    -
    -

    -Comparing to a faster version of the model

    -

    Stan’s specialized glm functions can be used to make models like this faster. In this case the likelihood can be replaced with

    +
    +

    Comparing to a faster version of the model +

    +

    Stan’s specialized glm functions can be used to make models like this +faster. In this case the likelihood can be replaced with

    target += bernoulli_logit_glm_lpmf(y | X, alpha, beta);
    -

    We’ll keep the same profile() statements so that the profiling information for the new model is collected automatically just like for the previous one.

    +

    We’ll keep the same profile() statements so that the +profiling information for the new model is collected automatically just +like for the previous one.

    -profiling_bernoulli_logit_glm <- write_stan_file('
    -data {
    -  int<lower=1> k;
    -  int<lower=0> n;
    -  matrix[n, k] X;
    -  int y[n];
    -}
    -parameters {
    -  vector[k] beta;
    -  real alpha;
    -}
    -model {
    -  profile("priors") {
    -    target += std_normal_lpdf(beta);
    -    target += std_normal_lpdf(alpha);
    -  }
    -  profile("likelihood") {
    -    target += bernoulli_logit_glm_lpmf(y | X, alpha, beta);
    -  }
    -}
    -')
    +profiling_bernoulli_logit_glm <- write_stan_file(' +data { + int<lower=1> k; + int<lower=0> n; + matrix[n, k] X; + array[n] int y; +} +parameters { + vector[k] beta; + real alpha; +} +model { + profile("priors") { + target += std_normal_lpdf(beta); + target += std_normal_lpdf(alpha); + } + profile("likelihood") { + target += bernoulli_logit_glm_lpmf(y | X, alpha, beta); + } +} +')
    -model_glm <- cmdstan_model(profiling_bernoulli_logit_glm)
    -fit_glm <- model_glm$sample(data = stan_data, chains = 1)
    +model_glm <- cmdstan_model(profiling_bernoulli_logit_glm) +fit_glm <- model_glm$sample(data = stan_data, chains = 1)
    -fit_glm$profiles()
    +fit_glm$profiles()
    [[1]]
             name   thread_id total_time forward_time reverse_time chain_stack
    -1 likelihood 0x10550ce00 0.44504100   0.44354000   0.00150062       17161
    -2     priors 0x10550ce00 0.00457269   0.00296803   0.00160466       34322
    +1 likelihood 0x1066bde00 0.45516500   0.45357200   0.00159287       17695
    +2     priors 0x1066bde00 0.00399743   0.00242302   0.00157441       35390
       no_chain_stack autodiff_calls no_autodiff_calls
    -1              0          17161                 1
    -2              0          17161                 1
    -

    We can see from the total_time column that this is much faster than the previous model.

    +1 0 17695 1 +2 0 17695 1
    +

    We can see from the total_time column that this is much +faster than the previous model.

    -
    -

    -Per-gradient timings, and memory usage

    -

    The other columns of the profiling output are documented in the CmdStan documentation.

    -

    The timing numbers are broken down by forward pass and reverse pass, and the chain_stack and no_chain_stack columns contain information about how many autodiff variables were saved in the process of performing a calculation.

    -

    These numbers are all totals – times are the total times over the whole calculation, and chain_stack counts are similarly the total counts of autodiff variables used over the whole calculation. It is often convenient to have per-gradient calculations (which will be more stable across runs with different seeds). To compute these, use the autodiff_calls column.

    +
    +

    Per-gradient timings, and memory usage +

    +

    The other columns of the profiling output are documented in the CmdStan +documentation.

    +

    The timing numbers are broken down by forward pass and reverse pass, +and the chain_stack and no_chain_stack columns +contain information about how many autodiff variables were saved in the +process of performing a calculation.

    +

    These numbers are all totals – times are the total times over the +whole calculation, and chain_stack counts are similarly the +total counts of autodiff variables used over the whole calculation. It +is often convenient to have per-gradient calculations (which will be +more stable across runs with different seeds). To compute these, use the +autodiff_calls column.

    -profile_chain_1 <- fit$profiles()[[1]]
    -per_gradient_timing <- profile_chain_1$total_time/profile_chain_1$autodiff_calls
    -print(per_gradient_timing) # two elements for the two profile statements in the model
    -
    [1] 4.556518e-05 3.074834e-07
    +profile_chain_1 <- fit$profiles()[[1]] +per_gradient_timing <- profile_chain_1$total_time/profile_chain_1$autodiff_calls +print(per_gradient_timing) # two elements for the two profile statements in the model
    +
    [1] 4.103770e-05 2.787479e-07
    -
    -

    -Accessing and saving the profile files

    -

    After sampling (or optimization or variational inference) finishes, CmdStan stores the profiling data in CSV files in a temporary location. The paths of the profiling CSV files can be retrieved using $profile_files().

    +
    +

    Accessing and saving the profile files +

    +

    After sampling (or optimization or variational inference) finishes, +CmdStan stores the profiling data in CSV files in a temporary location. +The paths of the profiling CSV files can be retrieved using +$profile_files().

    -fit$profile_files()
    -
    [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpsMj0TM/model_20fabfb2aee52e6a18d30460e35ae184-profile-202203181228-1-107daa.csv"
    -

    These can be saved to a more permanent location with the $save_profile_files() method.

    +fit$profile_files()
    +
    [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmppMVUoY/model_6580008f67848265f3dfd0e7ae3b0600-profile-202307251456-1-5100ca.csv"
    +

    These can be saved to a more permanent location with the +$save_profile_files() method.

    -# see ?save_profile_files for info on optional arguments
    -fit$save_profile_files(dir = "path/to/directory")
    +# see ?save_profile_files for info on optional arguments +fit$save_profile_files(dir = "path/to/directory")
    -
    -

    -References

    -

    Gelman, Andrew, Aki Vehtari, Daniel Simpson, Charles C. Margossian, Bob Carpenter, Yuling Yao, Lauren Kennedy, Jonah Gabry, Paul-Christian Bürkner, and Martin Modrák. 2020. “Bayesian Workflow.” https://arxiv.org/abs/2011.01808.

    +
    +

    References +

    +

    Gelman, Andrew, Aki Vehtari, Daniel Simpson, Charles C. Margossian, +Bob Carpenter, Yuling Yao, Lauren Kennedy, Jonah Gabry, Paul-Christian +Bürkner, and Martin Modrák. 2020. “Bayesian Workflow.” https://arxiv.org/abs/2011.01808.

    @@ -317,11 +362,13 @@

    -

    Site built with pkgdown 1.6.1.

    +

    +

    Site built with pkgdown 2.0.7.

    @@ -330,5 +377,7 @@

    + + diff --git a/docs/articles/r-markdown.html b/docs/articles/r-markdown.html index fe246b59b..eeb298b19 100644 --- a/docs/articles/r-markdown.html +++ b/docs/articles/r-markdown.html @@ -26,6 +26,8 @@ + +
    +
    -

    R Markdown supports a variety of languages through the use of knitr language engines. One such engine is the stan engine, which allows users to write Stan programs directly in their R Markdown documents by setting the language of the chunk to stan.

    -

    Behind the scenes, the engine relies on RStan to compile the model code into an in-memory stanmodel, which is assigned to a variable with the name given by the output.var chunk option. For example:

    -
    ```{stan, output.var="model"}
    -// Stan model code
    -```
    -
    -```{r}
    -rstan::sampling(model)
    -```
    -

    CmdStanR provides a replacement engine, which can be registered as follows:

    +

    R Markdown supports a variety of languages through the use of knitr +language engines. One such engine is the stan engine, which +allows users to write Stan programs directly in their R Markdown +documents by setting the language of the chunk to stan.

    +

    Behind the scenes, the engine relies on RStan to compile the model +code into an in-memory stanmodel, which is assigned to a +variable with the name given by the output.var chunk +option. For example:

    +
    ```{stan, output.var="model"}
    +// Stan model code
    +```
    +
    +```{r}
    +rstan::sampling(model)
    +```
    +

    CmdStanR provides a replacement engine, which can be registered as +follows:

    -

    By default, this overrides knitr’s built-in stan engine so that all stan chunks are processed with CmdStanR, not RStan. Of course, this also means that the variable specified by output.var will no longer be a stanmodel object, but instead a CmdStanModel object, so the code above would look like this:

    -
    ```{stan, output.var="model"}
    -// Stan model code
    -```
    -
    -```{r}
    -model$sample()
    -```
    -
    -

    -Example

    -
    // This stan chunk results in a CmdStanModel object called "ex1"
    -parameters {
    -  array[2] real y;
    -}
    -model {
    -  y[1] ~ normal(0, 1);
    -  y[2] ~ double_exponential(0, 2);
    -}
    +library(cmdstanr) +check_cmdstan_toolchain(fix = TRUE, quiet = TRUE) + +register_knitr_engine()
    +

    By default, this overrides knitr’s built-in stan engine +so that all stan chunks are processed with CmdStanR, not +RStan. Of course, this also means that the variable specified by +output.var will no longer be a stanmodel +object, but instead a CmdStanModel object, so the code +above would look like this:

    +
    ```{stan, output.var="model"}
    +// Stan model code
    +```
    +
    +```{r}
    +model$sample()
    +```
    +
    +

    Example +

    +
    // This stan chunk results in a CmdStanModel object called "ex1"
    +parameters {
    +  array[2] real y;
    +}
    +model {
    +  y[1] ~ normal(0, 1);
    +  y[2] ~ double_exponential(0, 2);
    +}
    -ex1$print()
    -#> // This stan chunk results in a CmdStanModel object called "ex1"
    -#> parameters {
    -#>   array[2] real y;
    -#> }
    -#> model {
    -#>   y[1] ~ normal(0, 1);
    -#>   y[2] ~ double_exponential(0, 2);
    -#> }
    +ex1$print() +#> // This stan chunk results in a CmdStanModel object called "ex1" +#> parameters { +#> array[2] real y; +#> } +#> model { +#> y[1] ~ normal(0, 1); +#> y[2] ~ double_exponential(0, 2); +#> }
    -fit <- ex1$sample(
    -  refresh = 0,
    -  seed = 42L
    -)
    -#> Running MCMC with 4 sequential chains...
    -#> 
    -#> Chain 1 finished in 0.0 seconds.
    -#> Chain 2 finished in 0.0 seconds.
    -#> Chain 3 finished in 0.0 seconds.
    -#> Chain 4 finished in 0.0 seconds.
    -#> 
    -#> All 4 chains finished successfully.
    -#> Mean chain execution time: 0.0 seconds.
    -#> Total execution time: 0.6 seconds.
    -
    -print(fit)
    -#>  variable  mean median   sd  mad    q5   q95 rhat ess_bulk ess_tail
    -#>      lp__ -1.50  -1.17 1.24 0.96 -3.94 -0.18 1.00     1304     1536
    -#>      y[1] -0.01  -0.01 0.99 0.99 -1.67  1.60 1.00     1993     2262
    -#>      y[2] -0.07  -0.04 2.90 2.05 -4.79  4.54 1.00     2050     1420
    +fit <- ex1$sample( + refresh = 0, + seed = 42L +) +#> Running MCMC with 4 sequential chains... +#> +#> Chain 1 finished in 0.0 seconds. +#> Chain 2 finished in 0.0 seconds. +#> Chain 3 finished in 0.0 seconds. +#> Chain 4 finished in 0.0 seconds. +#> +#> All 4 chains finished successfully. +#> Mean chain execution time: 0.0 seconds. +#> Total execution time: 0.7 seconds. + +print(fit) +#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail +#> lp__ -1.50 -1.17 1.24 0.96 -3.94 -0.18 1.00 1304 1536 +#> y[1] -0.01 -0.01 0.99 0.99 -1.67 1.60 1.00 1993 2262 +#> y[2] -0.07 -0.04 2.90 2.05 -4.79 4.54 1.00 2050 1420
    -
    -

    -Caching chunks

    -

    Use cache=TRUE chunk option to avoid re-compiling the Stan model code every time the R Markdown is knit/rendered.

    -

    You can find the Stan model file and the compiled executable in the document’s cache directory.

    +
    +

    Caching chunks +

    +

    Use cache=TRUE chunk option to avoid re-compiling the +Stan model code every time the R Markdown is knit/rendered.

    +

    You can find the Stan model file and the compiled executable in the +document’s cache directory.

    -
    -

    -Using RStan and CmdStanR engines side-by-side

    -

    While the default behavior is to override the built-in stan engine because the assumption is that the user is probably not using both RStan and CmdStanR in the same document or project, the option to use both exists. When registering CmdStanR’s knitr engine, set override = FALSE to register the engine as a cmdstan engine:

    +
    +

    Using RStan and CmdStanR engines side-by-side +

    +

    While the default behavior is to override the built-in +stan engine because the assumption is that the user is +probably not using both RStan and CmdStanR in the same document or +project, the option to use both exists. When registering CmdStanR’s +knitr engine, set override = FALSE to register the engine +as a cmdstan engine:

    -register_knitr_engine(override = FALSE)
    -

    This will cause stan chunks to be processed by knitr’s built-in, RStan-based engine and only use CmdStanR’s knitr engine for cmdstan chunks:

    -
    ```{stan, output.var="model_obj1"}
    -// Results in a stanmodel object from RStan
    -```
    -
    -```{r}
    -rstan::sampling(model_obj1)
    -```
    -
    -```{cmdstan, output.var="model_obj2"}
    -// Results in a CmdStanModel object from CmdStanR
    -```
    -
    -```{r}
    -model_obj2$sample()
    -```
    +register_knitr_engine(override = FALSE)
    +

    This will cause stan chunks to be processed by knitr’s +built-in, RStan-based engine and only use CmdStanR’s knitr engine for +cmdstan chunks:

    +
    ```{stan, output.var="model_obj1"}
    +// Results in a stanmodel object from RStan
    +```
    +
    +```{r}
    +rstan::sampling(model_obj1)
    +```
    +
    +```{cmdstan, output.var="model_obj2"}
    +// Results in a CmdStanModel object from CmdStanR
    +```
    +
    +```{r}
    +model_obj2$sample()
    +```
    -
    -

    -Running interactively

    -

    When running chunks interactively in RStudio (e.g. when using R Notebooks), it has been observed that the built-in, RStan-based engine is used for stan chunks even when CmdStanR’s engine has been registered in the session as the engine for stan. As a workaround, when running chunks interactively, it is recommended to use the override = FALSE option and change stan chunks to be cmdstan chunks.

    -

    Do not worry: if the template you use supports syntax highlighting for the Stan language, that syntax highlighting will be applied to cmdstan chunks when the document is knit/rendered.

    +
    +

    Running interactively +

    +

    When running chunks interactively in RStudio (e.g. when using R +Notebooks), it has been observed that the built-in, RStan-based +engine is used for stan chunks even when CmdStanR’s engine +has been registered in the session as the engine for stan. +As a workaround, when running chunks interactively, it is +recommended to use the override = FALSE option and change +stan chunks to be cmdstan chunks.

    +

    Do not worry: if the template you use supports syntax highlighting +for the Stan language, that syntax highlighting will be applied to +cmdstan chunks when the document is knit/rendered.

    @@ -253,11 +285,13 @@

    -

    Site built with pkgdown 1.6.1.

    +

    +

    Site built with pkgdown 2.0.7.

    @@ -266,5 +300,7 @@

    + + diff --git a/docs/authors.html b/docs/authors.html index c44f2bc06..be12ad2f9 100644 --- a/docs/authors.html +++ b/docs/authors.html @@ -1,74 +1,12 @@ - - - - - - - -Authors • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Authors and Citation • cmdstanr - - + + - - - -
    -
    -
    - -
    +
    - @@ -213,22 +165,20 @@

    Authors

    -
    - +
    - - + + diff --git a/docs/index.html b/docs/index.html index 079d4d868..c14b85fd5 100644 --- a/docs/index.html +++ b/docs/index.html @@ -19,11 +19,11 @@ + The CmdStanR interface is an alternative to RStan that calls the command + line interface for compilation and running algorithms instead of interfacing + with C++ via Rcpp. This has many benefits including always being compatible + with the latest version of Stan, fewer installation errors, fewer unexpected + crashes in RStudio, and a more permissive license."> + +
    -
    - - - + + diff --git a/docs/reference/fit-method-grad_log_prob.html b/docs/reference/fit-method-grad_log_prob.html new file mode 100644 index 000000000..9b8626521 --- /dev/null +++ b/docs/reference/fit-method-grad_log_prob.html @@ -0,0 +1,173 @@ + +Calculate the log-probability and the gradient w.r.t. each input for a +given vector of unconstrained parameters — fit-method-grad_log_prob • cmdstanr + + +
    +
    + + + +
    +
    + + +
    +

    The $grad_log_prob() method provides access to the +Stan model's log_prob function and its derivative

    +
    + +
    +
    grad_log_prob(unconstrained_variables, jacobian_adjustment = TRUE)
    +
    + +
    +

    Arguments

    +
    unconstrained_variables
    +

    (numeric) A vector of unconstrained parameters +to be passed to grad_log_prob.

    + + +
    jacobian_adjustment
    +

    (bool) Whether to include the log-density +adjustments from un/constraining variables.

    + +
    + + +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +fit_mcmc$init_model_methods()
    +#> Error: Model methods cannot be used with a pre-compiled Stan executable, the model must be compiled again
    +fit_mcmc$grad_log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2))
    +#> Error: The method has not been compiled, please call `init_model_methods()` first
    +# }
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fit-method-gradients.html b/docs/reference/fit-method-gradients.html index 06ad91dfa..e022df320 100644 --- a/docs/reference/fit-method-gradients.html +++ b/docs/reference/fit-method-gradients.html @@ -1,76 +1,13 @@ - - - - - - - -Extract gradients after diagnostic mode — fit-method-gradients • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract gradients after diagnostic mode — fit-method-gradients • cmdstanr - - + + - - -
    -
    - -
    - -
    +
    @@ -181,54 +109,58 @@

    Extract gradients after diagnostic mode

    parameters.

    -
    gradients()
    - - -

    Value

    - -

    A list of lists. See Examples.

    -

    See also

    - - +
    +
    gradients()
    +
    -

    Examples

    -
    # \dontrun{ -test <- cmdstanr_example("logistic", method = "diagnose") +
    +

    Value

    + -# retrieve the gradients -test$gradients() -
    #> param_idx value model finite_diff error -#> 1 0 0.3291950 4.51253 4.51253 -3.28079e-08 -#> 2 1 -0.0351071 -14.28740 -14.28740 3.60592e-09 -#> 3 2 1.7559700 -31.76980 -31.76980 -3.22111e-08 -#> 4 3 1.7963400 -12.81930 -12.81930 -2.77577e-08
    # } +

    A list of lists. See Examples.

    +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +test <- cmdstanr_example("logistic", method = "diagnose")
    +
    +# retrieve the gradients
    +test$gradients()
    +#>   param_idx     value     model finite_diff        error
    +#> 1         0  0.788454  -2.90866    -2.90866  4.89447e-09
    +#> 2         1  0.538042 -17.17950   -17.17950  2.76466e-08
    +#> 3         2 -1.725840  16.99630    16.99630  8.87535e-10
    +#> 4         3  1.854370  -7.63726    -7.63726 -8.95156e-09
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-hessian.html b/docs/reference/fit-method-hessian.html new file mode 100644 index 000000000..d8b35a7ca --- /dev/null +++ b/docs/reference/fit-method-hessian.html @@ -0,0 +1,172 @@ + +Calculate the log-probability , the gradient w.r.t. each input, and the hessian +for a given vector of unconstrained parameters — fit-method-hessian • cmdstanr + + +
    +
    + + + +
    +
    + + +
    +

    The $hessian() method provides access to the +Stan model's log_prob, its derivative, and its hessian

    +
    + +
    +
    hessian(unconstrained_variables, jacobian_adjustment = TRUE)
    +
    + +
    +

    Arguments

    +
    unconstrained_variables
    +

    (numeric) A vector of unconstrained parameters +to be passed to hessian.

    + + +
    jacobian_adjustment
    +

    (bool) Whether to include the log-density +adjustments from un/constraining variables.

    + +
    + + +
    +

    Examples

    +
    # \dontrun{
    +# fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +# fit_mcmc$init_model_methods(hessian = TRUE)
    +# fit_mcmc$hessian(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2))
    +# }
    +
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fit-method-init.html b/docs/reference/fit-method-init.html index 2183783ff..e66e48080 100644 --- a/docs/reference/fit-method-init.html +++ b/docs/reference/fit-method-init.html @@ -1,80 +1,17 @@ - - - - - - - -Extract user-specified initial values — fit-method-init • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract user-specified initial values — fit-method-init • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    @@ -189,74 +117,80 @@

    Extract user-specified initial values

    this in the future.

    -
    init()
    - - -

    Value

    - -

    A list of lists. See Examples.

    -

    See also

    +
    +
    init()
    +
    - +
    +

    Value

    + -

    Examples

    -
    # \dontrun{ -init_fun <- function() list(alpha = rnorm(1), beta = rnorm(3)) -fit <- cmdstanr_example("logistic", init = init_fun, chains = 2) -str(fit$init()) -
    #> List of 2 -#> $ :List of 2 -#> ..$ alpha: num 0.0834 -#> ..$ beta : num [1:3] 0.8592 0.0505 2.3089 -#> $ :List of 2 -#> ..$ alpha: num -0.859 -#> ..$ beta : num [1:3] 1.54 -1.61 -1.08
    -# partial inits (only specifying for a subset of parameters) -init_list <- list( - list(mu = 10, tau = 2), - list(mu = -10, tau = 1) -) -fit <- cmdstanr_example("schools_ncp", init = init_list, chains = 2, adapt_delta = 0.9) -
    #> Init values were only set for a subset of parameters. -#> Missing init values for the following parameters: -#> - chain 1: theta_raw -#> - chain 2: theta_raw
    -# only user-specified inits returned -str(fit$init()) -
    #> List of 2 -#> $ :List of 2 -#> ..$ mu : int 10 -#> ..$ tau: int 2 -#> $ :List of 2 -#> ..$ mu : int -10 -#> ..$ tau: int 1
    # } +

    A list of lists. See Examples.

    +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +init_fun <- function() list(alpha = rnorm(1), beta = rnorm(3))
    +fit <- cmdstanr_example("logistic", init = init_fun, chains = 2)
    +str(fit$init())
    +#> List of 2
    +#>  $ :List of 2
    +#>   ..$ alpha: num -1.25
    +#>   ..$ beta : num [1:3] -2.2653 0.0495 0.8638
    +#>  $ :List of 2
    +#>   ..$ alpha: num -0.172
    +#>   ..$ beta : num [1:3] 0.437 0.285 1.047
    +
    +# partial inits (only specifying for a subset of parameters)
    +init_list <- list(
    +  list(mu = 10, tau = 2),
    +  list(mu = -10, tau = 1)
    +)
    +fit <- cmdstanr_example("schools_ncp", init = init_list, chains = 2, adapt_delta = 0.9)
    +#> Init values were only set for a subset of parameters. 
    +#> Missing init values for the following parameters:
    +#>  - chain 1: theta_raw
    +#>  - chain 2: theta_raw
    +
    +# only user-specified inits returned
    +str(fit$init())
    +#> List of 2
    +#>  $ :List of 2
    +#>   ..$ mu : int 10
    +#>   ..$ tau: int 2
    +#>  $ :List of 2
    +#>   ..$ mu : int -10
    +#>   ..$ tau: int 1
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-init_model_methods.html b/docs/reference/fit-method-init_model_methods.html new file mode 100644 index 000000000..437e01d11 --- /dev/null +++ b/docs/reference/fit-method-init_model_methods.html @@ -0,0 +1,180 @@ + +Compile additional methods for accessing the model log-probability function +and parameter constraining and unconstraining. — fit-method-init_model_methods • cmdstanr + + +
    +
    + + + +
    +
    + + +
    +

    The $init_model_methods() method compiles and initializes the +log_prob, grad_log_prob, constrain_variables, unconstrain_variables +and unconstrain_draws functions. These are then available as methods of +the fitted model object. This requires the Rcpp package.

    +

    Note: there may be many compiler warnings emitted during compilation but +these can be ignored so long as they are warnings and not errors.

    +
    + +
    +
    init_model_methods(seed = 0, verbose = FALSE, hessian = FALSE)
    +
    + +
    +

    Arguments

    +
    seed
    +

    (integer) The random seed to use when initializing the model.

    + + +
    verbose
    +

    (boolean) Whether to show verbose logging during compilation.

    + + +
    hessian
    +

    (boolean) Whether to expose the (experimental) hessian method.

    + +
    + + +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +fit_mcmc$init_model_methods()
    +#> Error: Model methods cannot be used with a pre-compiled Stan executable, the model must be compiled again
    +# }
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fit-method-inv_metric.html b/docs/reference/fit-method-inv_metric.html index 1b83c5025..cca5c0ce6 100644 --- a/docs/reference/fit-method-inv_metric.html +++ b/docs/reference/fit-method-inv_metric.html @@ -1,75 +1,12 @@ - - - - - - - -Extract inverse metric (mass matrix) after MCMC — fit-method-inv_metric • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract inverse metric (mass matrix) after MCMC — fit-method-inv_metric • cmdstanr - + + - - - -
    -
    - -
    - -
    +
    @@ -179,128 +107,134 @@

    Extract inverse metric (mass matrix) after MCMC

    Extract the inverse metric (mass matrix) for each MCMC chain.

    -
    inv_metric(matrix = TRUE)
    +
    +
    inv_metric(matrix = TRUE)
    +
    -

    Arguments

    - - - - - - -
    matrix

    (logical) If a diagonal metric was used, setting matrix = FALSE returns a list containing just the diagonals of the matrices instead +

    +

    Arguments

    +
    matrix
    +

    (logical) If a diagonal metric was used, setting matrix = FALSE returns a list containing just the diagonals of the matrices instead of the full matrices. Setting matrix = FALSE has no effect for dense -metrics.

    +metrics.

    -

    Value

    +
    +
    +

    Value

    + -

    A list of length equal to the number of MCMC chains. See the matrix -argument for details.

    -

    See also

    +

    A list of length equal to the number of MCMC chains. See the matrix

    - -

    Examples

    -
    # \dontrun{ -fit <- cmdstanr_example("logistic") -fit$inv_metric() -
    #> $`1` -#> [,1] [,2] [,3] [,4] -#> [1,] 0.0481393 0.00000 0.0000000 0.0000000 -#> [2,] 0.0000000 0.05628 0.0000000 0.0000000 -#> [3,] 0.0000000 0.00000 0.0407205 0.0000000 -#> [4,] 0.0000000 0.00000 0.0000000 0.0702359 -#> -#> $`2` -#> [,1] [,2] [,3] [,4] -#> [1,] 0.0462024 0.0000000 0.0000000 0.0000000 -#> [2,] 0.0000000 0.0708086 0.0000000 0.0000000 -#> [3,] 0.0000000 0.0000000 0.0509179 0.0000000 -#> [4,] 0.0000000 0.0000000 0.0000000 0.0663432 -#> -#> $`3` -#> [,1] [,2] [,3] [,4] -#> [1,] 0.0401421 0.0000000 0.0000000 0.0000000 -#> [2,] 0.0000000 0.0599614 0.0000000 0.0000000 -#> [3,] 0.0000000 0.0000000 0.0409305 0.0000000 -#> [4,] 0.0000000 0.0000000 0.0000000 0.0636495 -#> -#> $`4` -#> [,1] [,2] [,3] [,4] -#> [1,] 0.048948 0.0000000 0.000000 0.0000000 -#> [2,] 0.000000 0.0612541 0.000000 0.0000000 -#> [3,] 0.000000 0.0000000 0.053141 0.0000000 -#> [4,] 0.000000 0.0000000 0.000000 0.0843771 -#>
    fit$inv_metric(matrix=FALSE) -
    #> $`1` -#> [1] 0.0481393 0.0562800 0.0407205 0.0702359 -#> -#> $`2` -#> [1] 0.0462024 0.0708086 0.0509179 0.0663432 -#> -#> $`3` -#> [1] 0.0401421 0.0599614 0.0409305 0.0636495 -#> -#> $`4` -#> [1] 0.0489480 0.0612541 0.0531410 0.0843771 -#>
    -fit <- cmdstanr_example("logistic", metric = "dense_e") -fit$inv_metric() -
    #> $`1` -#> [,1] [,2] [,3] [,4] -#> [1,] 0.04674080 -0.003735660 0.002672980 0.00238423 -#> [2,] -0.00373566 0.068142800 -0.000262148 -0.01289230 -#> [3,] 0.00267298 -0.000262148 0.046002800 -0.01442680 -#> [4,] 0.00238423 -0.012892300 -0.014426800 0.07403270 -#> -#> $`2` -#> [,1] [,2] [,3] [,4] -#> [1,] 0.04818520 -0.00541596 0.00262377 0.00791809 -#> [2,] -0.00541596 0.06098370 -0.00895976 -0.00360963 -#> [3,] 0.00262377 -0.00895976 0.06049940 -0.01483090 -#> [4,] 0.00791809 -0.00360963 -0.01483090 0.06889970 -#> -#> $`3` -#> [,1] [,2] [,3] [,4] -#> [1,] 0.04188870 -0.003366470 0.002556630 0.00143941 -#> [2,] -0.00336647 0.072588600 -0.000749238 -0.01014960 -#> [3,] 0.00255663 -0.000749238 0.062633300 -0.01869750 -#> [4,] 0.00143941 -0.010149600 -0.018697500 0.06842820 -#> -#> $`4` -#> [,1] [,2] [,3] [,4] -#> [1,] 0.050144300 -0.00512010 -0.000121339 0.00550570 -#> [2,] -0.005120100 0.05424960 0.005528330 -0.00520478 -#> [3,] -0.000121339 0.00552833 0.047886100 -0.01343470 -#> [4,] 0.005505700 -0.00520478 -0.013434700 0.07104140 -#>
    # } +

    argument for details.

    +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +fit <- cmdstanr_example("logistic")
    +fit$inv_metric()
    +#> $`1`
    +#>           [,1]      [,2]      [,3]      [,4]
    +#> [1,] 0.0431002 0.0000000 0.0000000 0.0000000
    +#> [2,] 0.0000000 0.0583438 0.0000000 0.0000000
    +#> [3,] 0.0000000 0.0000000 0.0531465 0.0000000
    +#> [4,] 0.0000000 0.0000000 0.0000000 0.0786797
    +#> 
    +#> $`2`
    +#>           [,1]      [,2]      [,3]      [,4]
    +#> [1,] 0.0454991 0.0000000 0.0000000 0.0000000
    +#> [2,] 0.0000000 0.0809962 0.0000000 0.0000000
    +#> [3,] 0.0000000 0.0000000 0.0496686 0.0000000
    +#> [4,] 0.0000000 0.0000000 0.0000000 0.0688616
    +#> 
    +#> $`3`
    +#>           [,1]     [,2]      [,3]      [,4]
    +#> [1,] 0.0406769 0.000000 0.0000000 0.0000000
    +#> [2,] 0.0000000 0.054686 0.0000000 0.0000000
    +#> [3,] 0.0000000 0.000000 0.0544764 0.0000000
    +#> [4,] 0.0000000 0.000000 0.0000000 0.0675343
    +#> 
    +#> $`4`
    +#>           [,1]      [,2]      [,3]      [,4]
    +#> [1,] 0.0434081 0.0000000 0.0000000 0.0000000
    +#> [2,] 0.0000000 0.0669723 0.0000000 0.0000000
    +#> [3,] 0.0000000 0.0000000 0.0466104 0.0000000
    +#> [4,] 0.0000000 0.0000000 0.0000000 0.0654703
    +#> 
    +fit$inv_metric(matrix=FALSE)
    +#> $`1`
    +#> [1] 0.0431002 0.0583438 0.0531465 0.0786797
    +#> 
    +#> $`2`
    +#> [1] 0.0454991 0.0809962 0.0496686 0.0688616
    +#> 
    +#> $`3`
    +#> [1] 0.0406769 0.0546860 0.0544764 0.0675343
    +#> 
    +#> $`4`
    +#> [1] 0.0434081 0.0669723 0.0466104 0.0654703
    +#> 
    +
    +fit <- cmdstanr_example("logistic", metric = "dense_e")
    +fit$inv_metric()
    +#> $`1`
    +#>             [,1]        [,2]        [,3]         [,4]
    +#> [1,] 4.31917e-02  0.00404353  0.00504066  9.28594e-06
    +#> [2,] 4.04353e-03  0.05741780 -0.00262368 -9.31332e-03
    +#> [3,] 5.04066e-03 -0.00262368  0.04816930 -1.40772e-02
    +#> [4,] 9.28594e-06 -0.00931332 -0.01407720  7.29028e-02
    +#> 
    +#> $`2`
    +#>             [,1]        [,2]        [,3]        [,4]
    +#> [1,]  0.05586860 -0.00758105  0.00327655  0.00186063
    +#> [2,] -0.00758105  0.06515060 -0.00821290 -0.00560473
    +#> [3,]  0.00327655 -0.00821290  0.05351870 -0.01152510
    +#> [4,]  0.00186063 -0.00560473 -0.01152510  0.06850340
    +#> 
    +#> $`3`
    +#>             [,1]        [,2]        [,3]        [,4]
    +#> [1,]  0.05023130  0.00119935  0.00439778 -0.00486186
    +#> [2,]  0.00119935  0.06410140 -0.00267351 -0.01118110
    +#> [3,]  0.00439778 -0.00267351  0.04945990 -0.01316990
    +#> [4,] -0.00486186 -0.01118110 -0.01316990  0.07295030
    +#> 
    +#> $`4`
    +#>             [,1]        [,2]        [,3]        [,4]
    +#> [1,]  0.05218850 -0.00108995  0.00814572  0.00609612
    +#> [2,] -0.00108995  0.06646310 -0.00283644 -0.01047740
    +#> [3,]  0.00814572 -0.00283644  0.05347060 -0.01085170
    +#> [4,]  0.00609612 -0.01047740 -0.01085170  0.07278000
    +#> 
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-log_prob.html b/docs/reference/fit-method-log_prob.html new file mode 100644 index 000000000..eb45ac2ab --- /dev/null +++ b/docs/reference/fit-method-log_prob.html @@ -0,0 +1,167 @@ + +Calculate the log-probability given a provided vector of unconstrained parameters. — fit-method-log_prob • cmdstanr + + +
    +
    + + + +
    +
    + + +
    +

    The $log_prob() method provides access to the Stan model's log_prob function

    +
    + +
    +
    log_prob(unconstrained_variables, jacobian_adjustment = TRUE)
    +
    + +
    +

    Arguments

    +
    unconstrained_variables
    +

    (numeric) A vector of unconstrained parameters to be passed to log_prob

    + + +
    jacobian_adjustment
    +

    (bool) Whether to include the log-density adjustments from +un/constraining variables

    + +
    + + +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +fit_mcmc$init_model_methods()
    +#> Error: Model methods cannot be used with a pre-compiled Stan executable, the model must be compiled again
    +fit_mcmc$log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2))
    +#> Error: The method has not been compiled, please call `init_model_methods()` first
    +# }
    +
    +
    +
    +
    + +
    + + +
    + +
    +

    Site built with pkgdown 2.0.7.

    +
    + +
    + + + + + + + + diff --git a/docs/reference/fit-method-loo.html b/docs/reference/fit-method-loo.html index 86fb957a4..30cfdd3db 100644 --- a/docs/reference/fit-method-loo.html +++ b/docs/reference/fit-method-loo.html @@ -1,79 +1,16 @@ - - - - - - - -Leave-one-out cross-validation (LOO-CV) — fit-method-loo • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Leave-one-out cross-validation (LOO-CV) — fit-method-loo • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +

    The $loo() method computes approximate LOO-CV using the -loo package. This is a simple wrapper around loo::loo.array() +loo package. This is a simple wrapper around loo::loo.array() provided for convenience and requires computing the pointwise log-likelihood in your Stan program. See the loo package -vignettes for details.

    +vignettes for details.

    -
    loo(variables = "log_lik", r_eff = TRUE, ...)
    +
    +
    loo(variables = "log_lik", r_eff = TRUE, moment_match = FALSE, ...)
    +
    -

    Arguments

    - - - - - - - - - - - - - - -
    variables

    (character vector) The name(s) of the variable(s) in the +

    +

    Arguments

    +
    variables
    +

    (character vector) The name(s) of the variable(s) in the Stan program containing the pointwise log-likelihood. The default is to look for "log_lik". This argument is passed to the -$draws() method.

    r_eff

    (multiple options) How to handle the r_eff argument for loo():

      -
    • TRUE (the default) will automatically call loo::relative_eff.array() -to compute the r_eff argument to pass to loo::loo.array().

    • +$draws() method.

      + + +
      r_eff
      +

      (multiple options) How to handle the r_eff argument for loo():

      • TRUE (the default) will automatically call loo::relative_eff.array() +to compute the r_eff argument to pass to loo::loo.array().

      • FALSE or NULL will avoid computing r_eff (which can sometimes be slow) but will result in a warning from the loo package.

      • If r_eff is anything else, that object will be passed as the r_eff -argument to loo::loo.array().

      • -
    ...

    Other arguments (e.g., cores, save_psis, etc.) passed to -loo::loo.array().

    +argument to loo::loo.array().

    + + -

    Value

    +
    moment_match
    +

    (boolean) Whether to use a moment-matching correction for +for problematic observations.

    -

    The object returned by loo::loo.array().

    -

    See also

    -

    The loo package website with -documentation and -vignettes.

    +
    ...
    +

    Other arguments (e.g., cores, save_psis, etc.) passed to +loo::loo.array() or loo::loo_moment_match.default() +(if moment_match = TRUE is set).

    -

    Examples

    -
    -# \dontrun{ -# the "logistic" example model has "log_lik" in generated quantities -fit <- cmdstanr_example("logistic") -loo_result <- fit$loo(cores = 2) -print(loo_result) -
    #> -#> Computed from 4000 by 100 log-likelihood matrix -#> -#> Estimate SE -#> elpd_loo -63.6 4.1 -#> p_loo 3.9 0.5 -#> looic 127.2 8.3 -#> ------ -#> Monte Carlo SE of elpd_loo is 0.0. -#> -#> All Pareto k estimates are good (k < 0.5). -#> See help('pareto-k-diagnostic') for details.
    # } +
    +
    +

    Value

    + -
    +

    The object returned by loo::loo.array().

    +
    +
    +

    See also

    +

    The loo package website with +documentation and +vignettes.

    +
    + +
    +

    Examples

    +
    
    +# \dontrun{
    +# the "logistic" example model has "log_lik" in generated quantities
    +fit <- cmdstanr_example("logistic")
    +loo_result <- fit$loo(cores = 2)
    +print(loo_result)
    +#> 
    +#> Computed from 4000 by 100 log-likelihood matrix
    +#> 
    +#>          Estimate  SE
    +#> elpd_loo    -63.6 4.1
    +#> p_loo         3.9 0.5
    +#> looic       127.2 8.3
    +#> ------
    +#> Monte Carlo SE of elpd_loo is 0.0.
    +#> 
    +#> All Pareto k estimates are good (k < 0.5).
    +#> See help('pareto-k-diagnostic') for details.
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-lp-1.png b/docs/reference/fit-method-lp-1.png index 6f1042311..ca30aa561 100644 Binary files a/docs/reference/fit-method-lp-1.png and b/docs/reference/fit-method-lp-1.png differ diff --git a/docs/reference/fit-method-lp.html b/docs/reference/fit-method-lp.html index 7dbbc1e72..8aaf6c788 100644 --- a/docs/reference/fit-method-lp.html +++ b/docs/reference/fit-method-lp.html @@ -1,81 +1,18 @@ - - - - - - - -Extract log probability (target) — fit-method-lp • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract log probability (target) — fit-method-lp • cmdstanr - - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    @@ -186,26 +114,30 @@

    Extract log probability (target)

    (target) accumulated in the model block of the Stan program. For variational inference the log density of the variational approximation to the posterior is also available via the $lp_approx() method.

    -

    See the Log Probability Increment vs. Sampling Statement +

    See the Log Probability Increment vs. Sampling Statement section of the Stan Reference Manual for details on when normalizing constants are dropped from log probability calculations.

    -
    lp()
    -
    -lp_approx()
    - +
    +
    lp()
    +
    +lp_approx()
    +
    -

    Value

    +
    +

    Value

    + -

    A numeric vector with length equal to the number of (post-warmup) +

    A numeric vector with length equal to the number of (post-warmup) draws for MCMC and variational inference, and length equal to 1 for optimization.

    -

    Details

    - +
    +
    +

    Details

    -

    lp__ is the unnormalized log density on Stan's unconstrained space. +

    lp__ is the unnormalized log density on Stan's unconstrained space. This will in general be different than the unnormalized model log density evaluated at a posterior draw (which is on the constrained space). lp__ is intended to diagnose sampling efficiency and evaluate approximations.

    @@ -213,53 +145,57 @@

    Details (also on the unconstrained space). It is exposed in the variational method for performing the checks described in Yao et al. (2018) and implemented in the loo package.

    -

    References

    - +
    +
    +

    References

    Yao, Y., Vehtari, A., Simpson, D., and Gelman, A. (2018). Yes, but did it work?: Evaluating variational inference. Proceedings of the 35th International Conference on Machine Learning, PMLR 80:5581–5590.

    -

    See also

    - - - -

    Examples

    -
    # \dontrun{ -fit_mcmc <- cmdstanr_example("logistic") -head(fit_mcmc$lp()) -
    #> [1] -68.9225 -69.2427 -70.5110 -65.5960 -65.2714 -64.1837
    -fit_mle <- cmdstanr_example("logistic", method = "optimize") -fit_mle$lp() -
    #> [1] -63.9218
    -fit_vb <- cmdstanr_example("logistic", method = "variational") -plot(fit_vb$lp(), fit_vb$lp_approx()) -
    # } +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic")
    +head(fit_mcmc$lp())
    +#> [1] -66.8230 -66.2198 -66.0794 -65.9100 -66.1692 -66.3888
    +
    +fit_mle <- cmdstanr_example("logistic", method = "optimize")
    +fit_mle$lp()
    +#> [1] -63.9218
    +
    +fit_vb <- cmdstanr_example("logistic", method = "variational")
    +plot(fit_vb$lp(), fit_vb$lp_approx())
    +
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-metadata.html b/docs/reference/fit-method-metadata.html index ebf454d11..88674bf7b 100644 --- a/docs/reference/fit-method-metadata.html +++ b/docs/reference/fit-method-metadata.html @@ -1,77 +1,14 @@ - - - - - - - -Extract metadata from CmdStan CSV files — fit-method-metadata • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract metadata from CmdStan CSV files — fit-method-metadata • cmdstanr - - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +

    The $metadata() method returns a list of information gathered from the CSV output files, including the CmdStan configuration used when -fitting the model. See Examples and read_cmdstan_csv().

    +fitting the model. See Examples and read_cmdstan_csv().

    -
    metadata()
    - - -

    See also

    - - +
    +
    metadata()
    +
    -

    Examples

    -
    # \dontrun{ -fit_mcmc <- cmdstanr_example("logistic", method = "sample") -str(fit_mcmc$metadata()) -
    #> List of 40 -#> $ stan_version_major : num 2 -#> $ stan_version_minor : num 29 -#> $ stan_version_patch : num 1 -#> $ start_datetime : chr "2022-03-18 18:25:53 UTC" -#> $ method : chr "sample" -#> $ save_warmup : num 0 -#> $ thin : num 1 -#> $ gamma : num 0.05 -#> $ kappa : num 0.75 -#> $ t0 : num 10 -#> $ init_buffer : num 75 -#> $ term_buffer : num 50 -#> $ window : num 25 -#> $ algorithm : chr "hmc" -#> $ engine : chr "nuts" -#> $ metric : chr "diag_e" -#> $ stepsize_jitter : num 0 -#> $ num_chains : num 1 -#> $ id : num [1:4] 1 2 3 4 -#> $ init : num [1:4] 2 2 2 2 -#> $ seed : num 1.35e+09 -#> $ refresh : num 100 -#> $ sig_figs : num -1 -#> $ profile_file : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-profile-202203181225-1-94bdc7.csv" -#> $ stanc_version : chr "stanc3 v2.29.1" -#> $ sampler_diagnostics : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ... -#> $ variables : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ... -#> $ step_size_adaptation: num [1:4] 0.763 0.761 0.699 0.761 -#> $ model_name : chr "logistic_model" -#> $ adapt_engaged : num 1 -#> $ adapt_delta : num 0.8 -#> $ max_treedepth : num 10 -#> $ step_size : num [1:4] 1 1 1 1 -#> $ iter_warmup : num 1000 -#> $ iter_sampling : num 1000 -#> $ threads_per_chain : num 1 -#> $ time :'data.frame': 4 obs. of 4 variables: -#> ..$ chain_id: num [1:4] 1 2 3 4 -#> ..$ warmup : num [1:4] 0.026 0.032 0.058 0.028 -#> ..$ sampling: num [1:4] 0.094 0.104 0.173 0.098 -#> ..$ total : num [1:4] 0.12 0.136 0.231 0.126 -#> $ stan_variable_sizes :List of 4 -#> ..$ lp__ : num 1 -#> ..$ alpha : num 1 -#> ..$ beta : num 3 -#> ..$ log_lik: num 100 -#> $ stan_variables : chr [1:4] "lp__" "alpha" "beta" "log_lik" -#> $ model_params : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    -fit_mle <- cmdstanr_example("logistic", method = "optimize") -str(fit_mle$metadata()) -
    #> List of 30 -#> $ stan_version_major : num 2 -#> $ stan_version_minor : num 29 -#> $ stan_version_patch : num 1 -#> $ start_datetime : chr "2022-03-18 18:25:55 UTC" -#> $ method : chr "optimize" -#> $ algorithm : chr "lbfgs" -#> $ init_alpha : num 0.001 -#> $ tol_obj : num 1e-12 -#> $ tol_rel_obj : num 10000 -#> $ tol_grad : num 1e-08 -#> $ tol_rel_grad : num 1e+07 -#> $ tol_param : num 1e-08 -#> $ history_size : num 5 -#> $ iter : num 2000 -#> $ save_iterations : num 0 -#> $ id : num 1 -#> $ init : num 2 -#> $ seed : num 4.24e+08 -#> $ refresh : num 100 -#> $ sig_figs : num -1 -#> $ profile_file : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-profile-202203181225-1-85709d.csv" -#> $ stanc_version : chr "stanc3 v2.29.1" -#> $ sampler_diagnostics: chr(0) -#> $ variables : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ... -#> $ model_name : chr "logistic_model" -#> $ threads : num 1 -#> $ time :'data.frame': 0 obs. of 0 variables -#> $ stan_variable_sizes:List of 4 -#> ..$ lp__ : num 1 -#> ..$ alpha : num 1 -#> ..$ beta : num 3 -#> ..$ log_lik: num 100 -#> $ stan_variables : chr [1:4] "lp__" "alpha" "beta" "log_lik" -#> $ model_params : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    -fit_vb <- cmdstanr_example("logistic", method = "variational") -str(fit_vb$metadata()) -
    #> List of 29 -#> $ stan_version_major : num 2 -#> $ stan_version_minor : num 29 -#> $ stan_version_patch : num 1 -#> $ start_datetime : chr "2022-03-18 18:25:55 UTC" -#> $ method : chr "variational" -#> $ algorithm : chr "meanfield" -#> $ iter : num 50 -#> $ grad_samples : num 1 -#> $ elbo_samples : num 100 -#> $ eta : num 1 -#> $ tol_rel_obj : num 0.01 -#> $ eval_elbo : num 100 -#> $ output_samples : num 1000 -#> $ id : num 1 -#> $ init : num 2 -#> $ seed : num 5.22e+08 -#> $ refresh : num 100 -#> $ sig_figs : num -1 -#> $ profile_file : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-profile-202203181225-1-18f1e4.csv" -#> $ stanc_version : chr "stanc3 v2.29.1" -#> $ sampler_diagnostics: chr(0) -#> $ variables : chr [1:106] "lp__" "lp_approx__" "alpha" "beta[1]" ... -#> $ model_name : chr "logistic_model" -#> $ adapt_engaged : num 1 -#> $ threads : num 1 -#> $ time :'data.frame': 0 obs. of 0 variables -#> $ stan_variable_sizes:List of 5 -#> ..$ lp__ : num 1 -#> ..$ lp_approx__: num 1 -#> ..$ alpha : num 1 -#> ..$ beta : num 3 -#> ..$ log_lik : num 100 -#> $ stan_variables : chr [1:5] "lp__" "lp_approx__" "alpha" "beta" ... -#> $ model_params : chr [1:106] "lp__" "lp_approx__" "alpha" "beta[1]" ...
    # } + -
    +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +str(fit_mcmc$metadata())
    +#> List of 40
    +#>  $ stan_version_major  : num 2
    +#>  $ stan_version_minor  : num 32
    +#>  $ stan_version_patch  : num 2
    +#>  $ start_datetime      : chr "2023-07-25 20:36:00 UTC"
    +#>  $ method              : chr "sample"
    +#>  $ save_warmup         : num 0
    +#>  $ thin                : num 1
    +#>  $ gamma               : num 0.05
    +#>  $ kappa               : num 0.75
    +#>  $ t0                  : num 10
    +#>  $ init_buffer         : num 75
    +#>  $ term_buffer         : num 50
    +#>  $ window              : num 25
    +#>  $ algorithm           : chr "hmc"
    +#>  $ engine              : chr "nuts"
    +#>  $ metric              : chr "diag_e"
    +#>  $ stepsize_jitter     : num 0
    +#>  $ num_chains          : num 1
    +#>  $ id                  : num [1:4] 1 2 3 4
    +#>  $ init                : num [1:4] 2 2 2 2
    +#>  $ seed                : num 49669263
    +#>  $ refresh             : num 100
    +#>  $ sig_figs            : num -1
    +#>  $ profile_file        : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-profile-202307251436-1-352ddc.csv"
    +#>  $ stanc_version       : chr "stanc3 v2.32.2"
    +#>  $ sampler_diagnostics : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ...
    +#>  $ variables           : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    +#>  $ step_size_adaptation: num [1:4] 0.702 0.797 0.751 0.804
    +#>  $ model_name          : chr "logistic_model"
    +#>  $ adapt_engaged       : num 1
    +#>  $ adapt_delta         : num 0.8
    +#>  $ max_treedepth       : num 10
    +#>  $ step_size           : num [1:4] 1 1 1 1
    +#>  $ iter_warmup         : num 1000
    +#>  $ iter_sampling       : num 1000
    +#>  $ threads_per_chain   : num 1
    +#>  $ time                :'data.frame':	4 obs. of  4 variables:
    +#>   ..$ chain_id: num [1:4] 1 2 3 4
    +#>   ..$ warmup  : num [1:4] 0.022 0.022 0.021 0.022
    +#>   ..$ sampling: num [1:4] 0.068 0.065 0.073 0.067
    +#>   ..$ total   : num [1:4] 0.09 0.087 0.094 0.089
    +#>  $ stan_variable_sizes :List of 4
    +#>   ..$ lp__   : num 1
    +#>   ..$ alpha  : num 1
    +#>   ..$ beta   : num 3
    +#>   ..$ log_lik: num 100
    +#>  $ stan_variables      : chr [1:4] "lp__" "alpha" "beta" "log_lik"
    +#>  $ model_params        : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    +
    +fit_mle <- cmdstanr_example("logistic", method = "optimize")
    +str(fit_mle$metadata())
    +#> List of 31
    +#>  $ stan_version_major : num 2
    +#>  $ stan_version_minor : num 32
    +#>  $ stan_version_patch : num 2
    +#>  $ start_datetime     : chr "2023-07-25 20:36:01 UTC"
    +#>  $ method             : chr "optimize"
    +#>  $ algorithm          : chr "lbfgs"
    +#>  $ init_alpha         : num 0.001
    +#>  $ tol_obj            : num 1e-12
    +#>  $ tol_rel_obj        : num 10000
    +#>  $ tol_grad           : num 1e-08
    +#>  $ tol_rel_grad       : num 1e+07
    +#>  $ tol_param          : num 1e-08
    +#>  $ history_size       : num 5
    +#>  $ jacobian           : num 0
    +#>  $ iter               : num 2000
    +#>  $ save_iterations    : num 0
    +#>  $ id                 : num 1
    +#>  $ init               : num 2
    +#>  $ seed               : num 1.74e+09
    +#>  $ refresh            : num 100
    +#>  $ sig_figs           : num -1
    +#>  $ profile_file       : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-profile-202307251436-1-4f11df.csv"
    +#>  $ stanc_version      : chr "stanc3 v2.32.2"
    +#>  $ sampler_diagnostics: chr(0) 
    +#>  $ variables          : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    +#>  $ model_name         : chr "logistic_model"
    +#>  $ threads            : num 1
    +#>  $ time               :'data.frame':	0 obs. of  0 variables
    +#>  $ stan_variable_sizes:List of 4
    +#>   ..$ lp__   : num 1
    +#>   ..$ alpha  : num 1
    +#>   ..$ beta   : num 3
    +#>   ..$ log_lik: num 100
    +#>  $ stan_variables     : chr [1:4] "lp__" "alpha" "beta" "log_lik"
    +#>  $ model_params       : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    +
    +fit_vb <- cmdstanr_example("logistic", method = "variational")
    +str(fit_vb$metadata())
    +#> List of 29
    +#>  $ stan_version_major : num 2
    +#>  $ stan_version_minor : num 32
    +#>  $ stan_version_patch : num 2
    +#>  $ start_datetime     : chr "2023-07-25 20:36:01 UTC"
    +#>  $ method             : chr "variational"
    +#>  $ algorithm          : chr "meanfield"
    +#>  $ iter               : num 50
    +#>  $ grad_samples       : num 1
    +#>  $ elbo_samples       : num 100
    +#>  $ eta                : num 1
    +#>  $ tol_rel_obj        : num 0.01
    +#>  $ eval_elbo          : num 100
    +#>  $ output_samples     : num 1000
    +#>  $ id                 : num 1
    +#>  $ init               : num 2
    +#>  $ seed               : num 2.97e+08
    +#>  $ refresh            : num 100
    +#>  $ sig_figs           : num -1
    +#>  $ profile_file       : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-profile-202307251436-1-5bbf54.csv"
    +#>  $ stanc_version      : chr "stanc3 v2.32.2"
    +#>  $ sampler_diagnostics: chr(0) 
    +#>  $ variables          : chr [1:106] "lp__" "lp_approx__" "alpha" "beta[1]" ...
    +#>  $ model_name         : chr "logistic_model"
    +#>  $ adapt_engaged      : num 1
    +#>  $ threads            : num 1
    +#>  $ time               :'data.frame':	0 obs. of  0 variables
    +#>  $ stan_variable_sizes:List of 5
    +#>   ..$ lp__       : num 1
    +#>   ..$ lp_approx__: num 1
    +#>   ..$ alpha      : num 1
    +#>   ..$ beta       : num 3
    +#>   ..$ log_lik    : num 100
    +#>  $ stan_variables     : chr [1:5] "lp__" "lp_approx__" "alpha" "beta" ...
    +#>  $ model_params       : chr [1:106] "lp__" "lp_approx__" "alpha" "beta[1]" ...
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-mle.html b/docs/reference/fit-method-mle.html index f191e2ad6..a6544a893 100644 --- a/docs/reference/fit-method-mle.html +++ b/docs/reference/fit-method-mle.html @@ -1,81 +1,18 @@ - - - - - - - -Extract (penalized) maximum likelihood estimate after optimization — fit-method-mle • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract (penalized) maximum likelihood estimate after optimization — fit-method-mle • cmdstanr - - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    -

    The $mle() method is only available for CmdStanMLE objects. +

    The $mle() method is only available for CmdStanMLE objects. It returns the penalized maximum likelihood estimate (posterior mode) as a numeric vector with one element per variable. The returned vector does not include lp__, the total log probability (target) accumulated in the model block of the Stan program, which is available via the -$lp() method and also included in the -$draws() method.

    +$lp() method and also included in the +$draws() method.

    -
    mle(variables = NULL)
    +
    +
    mle(variables = NULL)
    +
    -

    Arguments

    - - - - - - -
    variables

    (character vector) The variables (parameters, transformed +

    +

    Arguments

    +
    variables
    +

    (character vector) The variables (parameters, transformed parameters, and generated quantities) to include. If NULL (the default) -then all variables are included.

    - -

    Value

    +then all variables are included.

    -

    A numeric vector. See Examples.

    -

    See also

    +
    +
    +

    Value

    + - - -

    Examples

    -
    # \dontrun{ -fit <- cmdstanr_example("logistic", method = "optimize") -fit$mle("alpha") -
    #> alpha -#> 0.364475
    fit$mle("beta") -
    #> beta[1] beta[2] beta[3] -#> -0.631555 -0.258975 0.648491
    fit$mle("beta[2]") -
    #> beta[2] -#> -0.258975
    # } +

    A numeric vector. See Examples.

    +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +fit <- cmdstanr_example("logistic", method = "optimize")
    +fit$mle("alpha")
    +#>    alpha 
    +#> 0.364457 
    +fit$mle("beta")
    +#>   beta[1]   beta[2]   beta[3] 
    +#> -0.631547 -0.258967  0.648495 
    +fit$mle("beta[2]")
    +#>   beta[2] 
    +#> -0.258967 
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-num_chains.html b/docs/reference/fit-method-num_chains.html index 2f9483247..9c1066025 100644 --- a/docs/reference/fit-method-num_chains.html +++ b/docs/reference/fit-method-num_chains.html @@ -1,75 +1,12 @@ - - - - - - - -Extract number of chains after MCMC — fit-method-num_chains • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract number of chains after MCMC — fit-method-num_chains • cmdstanr - - + + - - -
    -
    - -
    - -
    +
    @@ -179,48 +107,52 @@

    Extract number of chains after MCMC

    The $num_chains() method returns the number of MCMC chains.

    -
    num_chains()
    - - -

    Value

    - -

    An integer.

    -

    See also

    +
    +
    num_chains()
    +
    - +
    +

    Value

    + -

    Examples

    -
    # \dontrun{ -fit_mcmc <- cmdstanr_example(chains = 2) -fit_mcmc$num_chains() -
    #> [1] 2
    # } +

    An integer.

    +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example(chains = 2)
    +fit_mcmc$num_chains()
    +#> [1] 2
    +# }
    +
    +
    +
    +
    -
    - +

    - - + + diff --git a/docs/reference/fit-method-output.html b/docs/reference/fit-method-output.html index 395a5186d..f83b9096e 100644 --- a/docs/reference/fit-method-output.html +++ b/docs/reference/fit-method-output.html @@ -1,80 +1,17 @@ - - - - - - - -Access console output — fit-method-output • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Access console output — fit-method-output • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    @@ -189,226 +117,233 @@

    Access console output

    the console output.

    -
    output(id = NULL)
    - -

    Arguments

    - - - - - - -
    id

    (integer) The chain id. Ignored if the model was not fit using -MCMC.

    - -

    See also

    +
    +
    output(id = NULL)
    +
    - +
    +

    Arguments

    +
    id
    +

    (integer) The chain id. Ignored if the model was not fit using +MCMC.

    -

    Examples

    -
    # \dontrun{ -fit_mcmc <- cmdstanr_example("logistic", method = "sample") -fit_mcmc$output(1) -
    #> -#> method = sample (Default) -#> sample -#> num_samples = 1000 (Default) -#> num_warmup = 1000 (Default) -#> save_warmup = 0 (Default) -#> thin = 1 (Default) -#> adapt -#> engaged = 1 (Default) -#> gamma = 0.050000000000000003 (Default) -#> delta = 0.80000000000000004 (Default) -#> kappa = 0.75 (Default) -#> t0 = 10 (Default) -#> init_buffer = 75 (Default) -#> term_buffer = 50 (Default) -#> window = 25 (Default) -#> algorithm = hmc (Default) -#> hmc -#> engine = nuts (Default) -#> nuts -#> max_depth = 10 (Default) -#> metric = diag_e (Default) -#> metric_file = (Default) -#> stepsize = 1 (Default) -#> stepsize_jitter = 0 (Default) -#> num_chains = 1 (Default) -#> id = 1 (Default) -#> data -#> file = /private/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpBlSxPc/temp_libpathbc8d32dd446a/cmdstanr/logistic.data.json -#> init = 2 (Default) -#> random -#> seed = 1354164569 -#> output -#> file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181225-1-827a18.csv -#> diagnostic_file = (Default) -#> refresh = 100 (Default) -#> sig_figs = -1 (Default) -#> profile_file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-profile-202203181225-1-987172.csv -#> num_threads = 1 (Default) -#> -#> -#> Gradient evaluation took 2.4e-05 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds. -#> Adjust your expectations accordingly! -#> -#> -#> Iteration: 1 / 2000 [ 0%] (Warmup) -#> Iteration: 100 / 2000 [ 5%] (Warmup) -#> Iteration: 200 / 2000 [ 10%] (Warmup) -#> Iteration: 300 / 2000 [ 15%] (Warmup) -#> Iteration: 400 / 2000 [ 20%] (Warmup) -#> Iteration: 500 / 2000 [ 25%] (Warmup) -#> Iteration: 600 / 2000 [ 30%] (Warmup) -#> Iteration: 700 / 2000 [ 35%] (Warmup) -#> Iteration: 800 / 2000 [ 40%] (Warmup) -#> Iteration: 900 / 2000 [ 45%] (Warmup) -#> Iteration: 1000 / 2000 [ 50%] (Warmup) -#> Iteration: 1001 / 2000 [ 50%] (Sampling) -#> Iteration: 1100 / 2000 [ 55%] (Sampling) -#> Iteration: 1200 / 2000 [ 60%] (Sampling) -#> Iteration: 1300 / 2000 [ 65%] (Sampling) -#> Iteration: 1400 / 2000 [ 70%] (Sampling) -#> Iteration: 1500 / 2000 [ 75%] (Sampling) -#> Iteration: 1600 / 2000 [ 80%] (Sampling) -#> Iteration: 1700 / 2000 [ 85%] (Sampling) -#> Iteration: 1800 / 2000 [ 90%] (Sampling) -#> Iteration: 1900 / 2000 [ 95%] (Sampling) -#> Iteration: 2000 / 2000 [100%] (Sampling) -#> -#> Elapsed Time: 0.026 seconds (Warm-up) -#> 0.088 seconds (Sampling) -#> 0.114 seconds (Total)
    out <- fit_mcmc$output() -str(out) -
    #> List of 4 -#> $ : chr [1:73] "" "method = sample (Default)" " sample" " num_samples = 1000 (Default)" ... -#> $ : chr [1:73] "" "method = sample (Default)" " sample" " num_samples = 1000 (Default)" ... -#> $ : chr [1:73] "" "method = sample (Default)" " sample" " num_samples = 1000 (Default)" ... -#> $ : chr [1:73] "" "method = sample (Default)" " sample" " num_samples = 1000 (Default)" ...
    -fit_mle <- cmdstanr_example("logistic", method = "optimize") -fit_mle$output() -
    #> -#> method = optimize -#> optimize -#> algorithm = lbfgs (Default) -#> lbfgs -#> init_alpha = 0.001 (Default) -#> tol_obj = 9.9999999999999998e-13 (Default) -#> tol_rel_obj = 10000 (Default) -#> tol_grad = 1e-08 (Default) -#> tol_rel_grad = 10000000 (Default) -#> tol_param = 1e-08 (Default) -#> history_size = 5 (Default) -#> iter = 2000 (Default) -#> save_iterations = 0 (Default) -#> id = 1 (Default) -#> data -#> file = /private/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpBlSxPc/temp_libpathbc8d32dd446a/cmdstanr/logistic.data.json -#> init = 2 (Default) -#> random -#> seed = 780877759 -#> output -#> file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181225-1-1e7746.csv -#> diagnostic_file = (Default) -#> refresh = 100 (Default) -#> sig_figs = -1 (Default) -#> profile_file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-profile-202203181225-1-71b1d7.csv -#> num_threads = 1 (Default) -#> -#> Initial log joint probability = -137.575 -#> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes -#> 7 -63.9218 9.27364e-05 0.000115205 0.9521 0.9521 11 -#> Optimization terminated normally: -#> Convergence detected: relative gradient magnitude is below tolerance
    -fit_vb <- cmdstanr_example("logistic", method = "variational") -fit_vb$output() -
    #> -#> method = variational -#> variational -#> algorithm = meanfield (Default) -#> meanfield -#> iter = 10000 (Default) -#> grad_samples = 1 (Default) -#> elbo_samples = 100 (Default) -#> eta = 1 (Default) -#> adapt -#> engaged = 1 (Default) -#> iter = 50 (Default) -#> tol_rel_obj = 0.01 (Default) -#> eval_elbo = 100 (Default) -#> output_samples = 1000 (Default) -#> id = 1 (Default) -#> data -#> file = /private/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpBlSxPc/temp_libpathbc8d32dd446a/cmdstanr/logistic.data.json -#> init = 2 (Default) -#> random -#> seed = 655253804 -#> output -#> file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181225-1-7c6411.csv -#> diagnostic_file = (Default) -#> refresh = 100 (Default) -#> sig_figs = -1 (Default) -#> profile_file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-profile-202203181225-1-8abe62.csv -#> num_threads = 1 (Default) -#> -#> ------------------------------------------------------------ -#> EXPERIMENTAL ALGORITHM: -#> This procedure has not been thoroughly tested and may be unstable -#> or buggy. The interface is subject to change. -#> ------------------------------------------------------------ -#> -#> -#> -#> Gradient evaluation took 2.6e-05 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.26 seconds. -#> Adjust your expectations accordingly! -#> -#> -#> Begin eta adaptation. -#> Iteration: 1 / 250 [ 0%] (Adaptation) -#> Iteration: 50 / 250 [ 20%] (Adaptation) -#> Iteration: 100 / 250 [ 40%] (Adaptation) -#> Iteration: 150 / 250 [ 60%] (Adaptation) -#> Iteration: 200 / 250 [ 80%] (Adaptation) -#> Success! Found best value [eta = 1] earlier than expected. -#> -#> Begin stochastic gradient ascent. -#> iter ELBO delta_ELBO_mean delta_ELBO_med notes -#> 100 -66.196 1.000 1.000 -#> 200 -66.407 0.502 1.000 -#> 300 -66.643 0.336 0.004 MEDIAN ELBO CONVERGED -#> -#> Drawing a sample of size 1000 from the approximate posterior... -#> COMPLETED.
    # } +
    + -
    +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +fit_mcmc$output(1)
    +#> 
    +#> method = sample (Default)
    +#>   sample
    +#>     num_samples = 1000 (Default)
    +#>     num_warmup = 1000 (Default)
    +#>     save_warmup = 0 (Default)
    +#>     thin = 1 (Default)
    +#>     adapt
    +#>       engaged = 1 (Default)
    +#>       gamma = 0.050000000000000003 (Default)
    +#>       delta = 0.80000000000000004 (Default)
    +#>       kappa = 0.75 (Default)
    +#>       t0 = 10 (Default)
    +#>       init_buffer = 75 (Default)
    +#>       term_buffer = 50 (Default)
    +#>       window = 25 (Default)
    +#>     algorithm = hmc (Default)
    +#>       hmc
    +#>         engine = nuts (Default)
    +#>           nuts
    +#>             max_depth = 10 (Default)
    +#>         metric = diag_e (Default)
    +#>         metric_file =  (Default)
    +#>         stepsize = 1 (Default)
    +#>         stepsize_jitter = 0 (Default)
    +#>     num_chains = 1 (Default)
    +#> id = 1 (Default)
    +#> data
    +#>   file = /private/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpKFjP9Y/temp_libpath15d4242e55c52/cmdstanr/logistic.data.json
    +#> init = 2 (Default)
    +#> random
    +#>   seed = 810297768
    +#> output
    +#>   file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251436-1-71373e.csv
    +#>   diagnostic_file =  (Default)
    +#>   refresh = 100 (Default)
    +#>   sig_figs = -1 (Default)
    +#>   profile_file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-profile-202307251436-1-09f0a3.csv
    +#> num_threads = 1 (Default)
    +#> 
    +#> 
    +#> Gradient evaluation took 2e-05 seconds
    +#> 1000 transitions using 10 leapfrog steps per transition would take 0.2 seconds.
    +#> Adjust your expectations accordingly!
    +#> 
    +#> 
    +#> Iteration:    1 / 2000 [  0%]  (Warmup)
    +#> Iteration:  100 / 2000 [  5%]  (Warmup)
    +#> Iteration:  200 / 2000 [ 10%]  (Warmup)
    +#> Iteration:  300 / 2000 [ 15%]  (Warmup)
    +#> Iteration:  400 / 2000 [ 20%]  (Warmup)
    +#> Iteration:  500 / 2000 [ 25%]  (Warmup)
    +#> Iteration:  600 / 2000 [ 30%]  (Warmup)
    +#> Iteration:  700 / 2000 [ 35%]  (Warmup)
    +#> Iteration:  800 / 2000 [ 40%]  (Warmup)
    +#> Iteration:  900 / 2000 [ 45%]  (Warmup)
    +#> Iteration: 1000 / 2000 [ 50%]  (Warmup)
    +#> Iteration: 1001 / 2000 [ 50%]  (Sampling)
    +#> Iteration: 1100 / 2000 [ 55%]  (Sampling)
    +#> Iteration: 1200 / 2000 [ 60%]  (Sampling)
    +#> Iteration: 1300 / 2000 [ 65%]  (Sampling)
    +#> Iteration: 1400 / 2000 [ 70%]  (Sampling)
    +#> Iteration: 1500 / 2000 [ 75%]  (Sampling)
    +#> Iteration: 1600 / 2000 [ 80%]  (Sampling)
    +#> Iteration: 1700 / 2000 [ 85%]  (Sampling)
    +#> Iteration: 1800 / 2000 [ 90%]  (Sampling)
    +#> Iteration: 1900 / 2000 [ 95%]  (Sampling)
    +#> Iteration: 2000 / 2000 [100%]  (Sampling)
    +#> 
    +#>  Elapsed Time: 0.022 seconds (Warm-up)
    +#>                0.067 seconds (Sampling)
    +#>                0.089 seconds (Total)
    +out <- fit_mcmc$output()
    +str(out)
    +#> List of 4
    +#>  $ : chr [1:73] "" "method = sample (Default)" "  sample" "    num_samples = 1000 (Default)" ...
    +#>  $ : chr [1:73] "" "method = sample (Default)" "  sample" "    num_samples = 1000 (Default)" ...
    +#>  $ : chr [1:73] "" "method = sample (Default)" "  sample" "    num_samples = 1000 (Default)" ...
    +#>  $ : chr [1:73] "" "method = sample (Default)" "  sample" "    num_samples = 1000 (Default)" ...
    +
    +fit_mle <- cmdstanr_example("logistic", method = "optimize")
    +fit_mle$output()
    +#> 
    +#> method = optimize
    +#>   optimize
    +#>     algorithm = lbfgs (Default)
    +#>       lbfgs
    +#>         init_alpha = 0.001 (Default)
    +#>         tol_obj = 9.9999999999999998e-13 (Default)
    +#>         tol_rel_obj = 10000 (Default)
    +#>         tol_grad = 1e-08 (Default)
    +#>         tol_rel_grad = 10000000 (Default)
    +#>         tol_param = 1e-08 (Default)
    +#>         history_size = 5 (Default)
    +#>     jacobian = 0 (Default)
    +#>     iter = 2000 (Default)
    +#>     save_iterations = 0 (Default)
    +#> id = 1 (Default)
    +#> data
    +#>   file = /private/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpKFjP9Y/temp_libpath15d4242e55c52/cmdstanr/logistic.data.json
    +#> init = 2 (Default)
    +#> random
    +#>   seed = 311070905
    +#> output
    +#>   file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251436-1-656287.csv
    +#>   diagnostic_file =  (Default)
    +#>   refresh = 100 (Default)
    +#>   sig_figs = -1 (Default)
    +#>   profile_file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-profile-202307251436-1-75d1fd.csv
    +#> num_threads = 1 (Default)
    +#> 
    +#> Initial log joint probability = -109.352
    +#>     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
    +#>        7      -63.9218   0.000676152    0.00167181       0.941       0.941        9   
    +#> Optimization terminated normally: 
    +#>   Convergence detected: relative gradient magnitude is below tolerance
    +
    +fit_vb <- cmdstanr_example("logistic", method = "variational")
    +fit_vb$output()
    +#> 
    +#> method = variational
    +#>   variational
    +#>     algorithm = meanfield (Default)
    +#>       meanfield
    +#>     iter = 10000 (Default)
    +#>     grad_samples = 1 (Default)
    +#>     elbo_samples = 100 (Default)
    +#>     eta = 1 (Default)
    +#>     adapt
    +#>       engaged = 1 (Default)
    +#>       iter = 50 (Default)
    +#>     tol_rel_obj = 0.01 (Default)
    +#>     eval_elbo = 100 (Default)
    +#>     output_samples = 1000 (Default)
    +#> id = 1 (Default)
    +#> data
    +#>   file = /private/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpKFjP9Y/temp_libpath15d4242e55c52/cmdstanr/logistic.data.json
    +#> init = 2 (Default)
    +#> random
    +#>   seed = 296025410
    +#> output
    +#>   file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251436-1-7927e8.csv
    +#>   diagnostic_file =  (Default)
    +#>   refresh = 100 (Default)
    +#>   sig_figs = -1 (Default)
    +#>   profile_file = /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-profile-202307251436-1-2bb650.csv
    +#> num_threads = 1 (Default)
    +#> 
    +#> ------------------------------------------------------------
    +#> EXPERIMENTAL ALGORITHM:
    +#>   This procedure has not been thoroughly tested and may be unstable
    +#>   or buggy. The interface is subject to change.
    +#> ------------------------------------------------------------
    +#> 
    +#> 
    +#> 
    +#> Gradient evaluation took 2.1e-05 seconds
    +#> 1000 transitions using 10 leapfrog steps per transition would take 0.21 seconds.
    +#> Adjust your expectations accordingly!
    +#> 
    +#> 
    +#> Begin eta adaptation.
    +#> Iteration:   1 / 250 [  0%]  (Adaptation)
    +#> Iteration:  50 / 250 [ 20%]  (Adaptation)
    +#> Iteration: 100 / 250 [ 40%]  (Adaptation)
    +#> Iteration: 150 / 250 [ 60%]  (Adaptation)
    +#> Iteration: 200 / 250 [ 80%]  (Adaptation)
    +#> Success! Found best value [eta = 1] earlier than expected.
    +#> 
    +#> Begin stochastic gradient ascent.
    +#>   iter             ELBO   delta_ELBO_mean   delta_ELBO_med   notes 
    +#>    100          -66.992             1.000            1.000
    +#>    200          -66.023             0.507            1.000
    +#>    300          -66.735             0.342            0.015
    +#>    400          -66.204             0.258            0.015
    +#>    500          -66.470             0.207            0.011
    +#>    600          -66.254             0.173            0.011
    +#>    700          -66.213             0.149            0.008   MEDIAN ELBO CONVERGED
    +#> 
    +#> Drawing a sample of size 1000 from the approximate posterior... 
    +#> COMPLETED.
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-profiles.html b/docs/reference/fit-method-profiles.html index 2a17e670a..48a8cbe07 100644 --- a/docs/reference/fit-method-profiles.html +++ b/docs/reference/fit-method-profiles.html @@ -1,79 +1,16 @@ - - - - - - - -Return profiling data — fit-method-profiles • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Return profiling data — fit-method-profiles • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +

    The $profiles() method returns a list of data frames with profiling data if any profiling data was written to the profile CSV files. -See save_profile_files() to control where the files are saved.

    +See save_profile_files() to control where the files are saved.

    Support for profiling Stan programs is available with CmdStan >= 2.26 and requires adding profiling statements to the Stan program.

    -
    profiles()
    - +
    +
    profiles()
    +
    -

    Value

    +
    +

    Value

    + -

    A list of data frames with profiling data if the profiling CSV files +

    A list of data frames with profiling data if the profiling CSV files were created.

    -

    See also

    - - - -

    Examples

    -
    -# \dontrun{ -# first fit a model using MCMC -mcmc_program <- write_stan_file( - 'data { - int<lower=0> N; - int<lower=0,upper=1> y[N]; - } - parameters { - real<lower=0,upper=1> theta; - } - model { - profile("likelihood") { - y ~ bernoulli(theta); - } - } - generated quantities { - int y_rep[N]; - profile("gq") { - y_rep = bernoulli_rng(rep_vector(theta, N)); - } - } -' -) -mod_mcmc <- cmdstan_model(mcmc_program) -
    #> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/model-d65431ed4576.stan', line 3, column 4: Declaration -#> of arrays by placing brackets after a variable name is deprecated and -#> will be removed in Stan 2.32.0. Instead use the array keyword before the -#> type. This can be changed automatically using the auto-format flag to -#> stanc -#> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/model-d65431ed4576.stan', line 14, column 4: Declaration -#> of arrays by placing brackets after a variable name is deprecated and -#> will be removed in Stan 2.32.0. Instead use the array keyword before the -#> type. This can be changed automatically using the auto-format flag to -#> stanc
    -data <- list(N = 10, y = c(1,1,0,0,0,1,0,1,0,0)) -fit <- mod_mcmc$sample(data = data, seed = 123, refresh = 0) -
    #> Running MCMC with 4 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> Chain 3 finished in 0.0 seconds. -#> Chain 4 finished in 0.0 seconds. -#> -#> All 4 chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.5 seconds. -#>
    -fit$profiles() -
    #> [[1]] -#> name thread_id total_time forward_time reverse_time chain_stack -#> 1 gq 0x10f232e00 0.000386068 0.000386068 0.000000000 0 -#> 2 likelihood 0x10f232e00 0.001285670 0.000903227 0.000382445 7169 -#> no_chain_stack autodiff_calls no_autodiff_calls -#> 1 0 0 1000 -#> 2 0 7169 1 -#> -#> [[2]] -#> name thread_id total_time forward_time reverse_time chain_stack -#> 1 gq 0x101daee00 0.000393415 0.000393415 0.000000000 0 -#> 2 likelihood 0x101daee00 0.001305930 0.000932404 0.000373525 7155 -#> no_chain_stack autodiff_calls no_autodiff_calls -#> 1 0 0 1000 -#> 2 0 7155 1 -#> -#> [[3]] -#> name thread_id total_time forward_time reverse_time chain_stack -#> 1 gq 0x1121b1e00 0.000534289 0.000534289 0.000000000 0 -#> 2 likelihood 0x1121b1e00 0.001384640 0.000993686 0.000390953 6879 -#> no_chain_stack autodiff_calls no_autodiff_calls -#> 1 0 0 1000 -#> 2 0 6879 1 -#> -#> [[4]] -#> name thread_id total_time forward_time reverse_time chain_stack -#> 1 gq 0x113ecce00 0.000356809 0.000356809 0.000000000 0 -#> 2 likelihood 0x113ecce00 0.001137030 0.000803924 0.000333102 6892 -#> no_chain_stack autodiff_calls no_autodiff_calls -#> 1 0 0 1000 -#> 2 0 6892 1 -#>
    # } +
    + -
    +
    +

    Examples

    +
    
    +# \dontrun{
    +# first fit a model using MCMC
    +mcmc_program <- write_stan_file(
    +  'data {
    +    int<lower=0> N;
    +    int<lower=0,upper=1> y[N];
    +  }
    +  parameters {
    +    real<lower=0,upper=1> theta;
    +  }
    +  model {
    +    profile("likelihood") {
    +      y ~ bernoulli(theta);
    +    }
    +  }
    +  generated quantities {
    +    int y_rep[N];
    +    profile("gq") {
    +      y_rep = bernoulli_rng(rep_vector(theta, N));
    +    }
    +  }
    +'
    +)
    +mod_mcmc <- cmdstan_model(mcmc_program)
    +#> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/model-16218658ea62a.stan', line 3, column 4: Declaration
    +#>     of arrays by placing brackets after a variable name is deprecated and
    +#>     will be removed in Stan 2.33.0. Instead use the array keyword before the
    +#>     type. This can be changed automatically using the auto-format flag to
    +#>     stanc
    +#> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/model-16218658ea62a.stan', line 14, column 4: Declaration
    +#>     of arrays by placing brackets after a variable name is deprecated and
    +#>     will be removed in Stan 2.33.0. Instead use the array keyword before the
    +#>     type. This can be changed automatically using the auto-format flag to
    +#>     stanc
    +
    +data <- list(N = 10, y = c(1,1,0,0,0,1,0,1,0,0))
    +fit <- mod_mcmc$sample(data = data, seed = 123, refresh = 0)
    +#> Running MCMC with 4 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> Chain 3 finished in 0.0 seconds.
    +#> Chain 4 finished in 0.0 seconds.
    +#> 
    +#> All 4 chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.5 seconds.
    +#> 
    +
    +fit$profiles()
    +#> [[1]]
    +#>         name   thread_id total_time forward_time reverse_time chain_stack
    +#> 1         gq 0x10afd5e00 0.00037530  0.000375300  0.000000000           0
    +#> 2 likelihood 0x10afd5e00 0.00114227  0.000802925  0.000339345        7169
    +#>   no_chain_stack autodiff_calls no_autodiff_calls
    +#> 1              0              0              1000
    +#> 2              0           7169                 1
    +#> 
    +#> [[2]]
    +#>         name   thread_id total_time forward_time reverse_time chain_stack
    +#> 1         gq 0x11a045e00 0.00043389  0.000433890  0.000000000           0
    +#> 2 likelihood 0x11a045e00 0.00111438  0.000786092  0.000328286        7155
    +#>   no_chain_stack autodiff_calls no_autodiff_calls
    +#> 1              0              0              1000
    +#> 2              0           7155                 1
    +#> 
    +#> [[3]]
    +#>         name   thread_id  total_time forward_time reverse_time chain_stack
    +#> 1         gq 0x10c67be00 0.000330582  0.000330582  0.000000000           0
    +#> 2 likelihood 0x10c67be00 0.001059290  0.000746087  0.000313204        6879
    +#>   no_chain_stack autodiff_calls no_autodiff_calls
    +#> 1              0              0              1000
    +#> 2              0           6879                 1
    +#> 
    +#> [[4]]
    +#>         name   thread_id  total_time forward_time reverse_time chain_stack
    +#> 1         gq 0x108f13e00 0.000365107  0.000365107  0.000000000           0
    +#> 2 likelihood 0x108f13e00 0.001159310  0.000815756  0.000343553        6892
    +#>   no_chain_stack autodiff_calls no_autodiff_calls
    +#> 1              0              0              1000
    +#> 2              0           6892                 1
    +#> 
    +# }
    +
    +
    +
    +
    -
    - +
    - - + + diff --git a/docs/reference/fit-method-return_codes.html b/docs/reference/fit-method-return_codes.html index 6a82b3484..9ccd42588 100644 --- a/docs/reference/fit-method-return_codes.html +++ b/docs/reference/fit-method-return_codes.html @@ -1,76 +1,13 @@ - - - - - - - -Extract return codes from CmdStan — fit-method-return_codes • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract return codes from CmdStan — fit-method-return_codes • cmdstanr - - - - + + -
    -
    - -
    - -
    +
    @@ -181,55 +109,65 @@

    Extract return codes from CmdStan

    from the CmdStan run(s). A return code of 0 indicates a successful run.

    -
    return_codes()
    - +
    +
    return_codes()
    +
    -

    Value

    +
    +

    Value

    + -

    An integer vector of return codes with length equal to the number of +

    An integer vector of return codes with length equal to the number of CmdStan runs (number of chains for MCMC and one otherwise).

    -

    See also

    - - - -

    Examples

    -
    # \dontrun{ -# example with return codes all zero -fit_mcmc <- cmdstanr_example("schools", method = "sample") -
    #> Warning: 138 of 4000 (3.0%) transitions ended with a divergence. -#> See https://mc-stan.org/misc/warnings for details.
    fit_mcmc$return_codes() # should be all zero -
    #> [1] 0 0 0 0
    -# example of non-zero return code (optimization fails for hierarchical model) -fit_opt <- cmdstanr_example("schools", method = "optimize") -
    #> Optimization terminated with error:
    #> Line search failed to achieve a sufficient decrease, no more progress can be made
    fit_opt$return_codes() # should be non-zero -
    #> [1] 1
    # } - -
    +
    + + +
    +

    Examples

    +
    # \dontrun{
    +# example with return codes all zero
    +fit_mcmc <- cmdstanr_example("schools", method = "sample")
    +#> Warning: 258 of 4000 (6.0%) transitions ended with a divergence.
    +#> See https://mc-stan.org/misc/warnings for details.
    +#> Warning: 2 of 4 chains had an E-BFMI less than 0.2.
    +#> See https://mc-stan.org/misc/warnings for details.
    +fit_mcmc$return_codes() # should be all zero
    +#> [1] 0 0 0 0
    +
    +# example of non-zero return code (optimization fails for hierarchical model)
    +fit_opt <- cmdstanr_example("schools", method = "optimize")
    +#> Optimization terminated with error: 
    +#>   Line search failed to achieve a sufficient decrease, no more progress can be made
    +fit_opt$return_codes() # should be non-zero
    +#> [1] 1
    +# }
    +
    +
    +
    +
    -

    - - + + diff --git a/docs/reference/fit-method-sampler_diagnostics.html b/docs/reference/fit-method-sampler_diagnostics.html index f2c07e2cd..f36572b42 100644 --- a/docs/reference/fit-method-sampler_diagnostics.html +++ b/docs/reference/fit-method-sampler_diagnostics.html @@ -1,78 +1,15 @@ - - - - - - - -Extract sampler diagnostics after MCMC — fit-method-sampler_diagnostics • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Extract sampler diagnostics after MCMC — fit-method-sampler_diagnostics • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    @@ -182,108 +110,112 @@

    Extract sampler diagnostics after MCMC

    Extract the values of sampler diagnostics for each iteration and chain of MCMC. To instead get summaries of these diagnostics and associated warning messages use the -$diagnostic_summary() method.

    +$diagnostic_summary() method.

    +
    + +
    +
    sampler_diagnostics(
    +  inc_warmup = FALSE,
    +  format = getOption("cmdstanr_draws_format", "draws_array")
    +)
    -
    sampler_diagnostics(
    -  inc_warmup = FALSE,
    -  format = getOption("cmdstanr_draws_format", "draws_array")
    -)
    +
    +

    Arguments

    +
    inc_warmup
    +

    (logical) Should warmup draws be included? Defaults to FALSE.

    + -

    Arguments

    - - - - - - - - - - -
    inc_warmup

    (logical) Should warmup draws be included? Defaults to FALSE.

    format

    (string) The draws format to return. See -draws for details.

    +
    format
    +

    (string) The draws format to return. See +draws for details.

    -

    Value

    +
    +
    +

    Value

    + -

    Depends on format, but the default is a 3-D -draws_array object (iteration x chain x +

    Depends on format, but the default is a 3-D +draws_array object (iteration x chain x variable). The variables for Stan's default MCMC algorithm are "accept_stat__", "stepsize__", "treedepth__", "n_leapfrog__", "divergent__", "energy__".

    -

    See also

    - - - -

    Examples

    -
    # \dontrun{ -fit <- cmdstanr_example("logistic") -sampler_diagnostics <- fit$sampler_diagnostics() -str(sampler_diagnostics) -
    #> 'draws_array' num [1:1000, 1:4, 1:6] 3 2 2 2 3 1 1 2 2 2 ... -#> - attr(*, "dimnames")=List of 3 -#> ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... -#> ..$ chain : chr [1:4] "1" "2" "3" "4" -#> ..$ variable : chr [1:6] "treedepth__" "divergent__" "energy__" "accept_stat__" ...
    -library(posterior) -as_draws_df(sampler_diagnostics) -
    #> # A draws_df: 1000 iterations, 4 chains, and 6 variables -#> treedepth__ divergent__ energy__ accept_stat__ stepsize__ n_leapfrog__ -#> 1 3 0 68 1.00 0.87 7 -#> 2 2 0 73 0.60 0.87 7 -#> 3 2 0 72 1.00 0.87 3 -#> 4 2 0 67 0.97 0.87 3 -#> 5 3 0 68 1.00 0.87 7 -#> 6 1 0 68 0.82 0.87 3 -#> 7 1 0 70 0.57 0.87 3 -#> 8 2 0 67 1.00 0.87 3 -#> 9 2 0 67 0.82 0.87 3 -#> 10 2 0 67 0.98 0.87 3 -#> # ... with 3990 more draws -#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    -# or specify format to get a data frame instead of calling as_draws_df -fit$sampler_diagnostics(format = "df") -
    #> # A draws_df: 1000 iterations, 4 chains, and 6 variables -#> treedepth__ divergent__ energy__ accept_stat__ stepsize__ n_leapfrog__ -#> 1 3 0 68 1.00 0.87 7 -#> 2 2 0 73 0.60 0.87 7 -#> 3 2 0 72 1.00 0.87 3 -#> 4 2 0 67 0.97 0.87 3 -#> 5 3 0 68 1.00 0.87 7 -#> 6 1 0 68 0.82 0.87 3 -#> 7 1 0 70 0.57 0.87 3 -#> 8 2 0 67 1.00 0.87 3 -#> 9 2 0 67 0.82 0.87 3 -#> 10 2 0 67 0.98 0.87 3 -#> # ... with 3990 more draws -#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    # } +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +fit <- cmdstanr_example("logistic")
    +sampler_diagnostics <- fit$sampler_diagnostics()
    +str(sampler_diagnostics)
    +#>  'draws_array' num [1:1000, 1:4, 1:6] 1 3 2 2 2 3 2 2 3 2 ...
    +#>  - attr(*, "dimnames")=List of 3
    +#>   ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
    +#>   ..$ chain    : chr [1:4] "1" "2" "3" "4"
    +#>   ..$ variable : chr [1:6] "treedepth__" "divergent__" "energy__" "accept_stat__" ...
    +
    +library(posterior)
    +as_draws_df(sampler_diagnostics)
    +#> # A draws_df: 1000 iterations, 4 chains, and 6 variables
    +#>    treedepth__ divergent__ energy__ accept_stat__ stepsize__ n_leapfrog__
    +#> 1            1           0       66          0.91       0.67            3
    +#> 2            3           0       66          0.87       0.67            7
    +#> 3            2           0       72          0.65       0.67            3
    +#> 4            2           0       70          1.00       0.67            3
    +#> 5            2           0       66          1.00       0.67            3
    +#> 6            3           0       65          0.93       0.67            7
    +#> 7            2           0       69          0.70       0.67            3
    +#> 8            2           0       70          0.99       0.67            3
    +#> 9            3           0       66          0.98       0.67            7
    +#> 10           2           0       67          0.90       0.67            3
    +#> # ... with 3990 more draws
    +#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    +
    +# or specify format to get a data frame instead of calling as_draws_df
    +fit$sampler_diagnostics(format = "df")
    +#> # A draws_df: 1000 iterations, 4 chains, and 6 variables
    +#>    treedepth__ divergent__ energy__ accept_stat__ stepsize__ n_leapfrog__
    +#> 1            1           0       66          0.91       0.67            3
    +#> 2            3           0       66          0.87       0.67            7
    +#> 3            2           0       72          0.65       0.67            3
    +#> 4            2           0       70          1.00       0.67            3
    +#> 5            2           0       66          1.00       0.67            3
    +#> 6            3           0       65          0.93       0.67            7
    +#> 7            2           0       69          0.70       0.67            3
    +#> 8            2           0       70          0.99       0.67            3
    +#> 9            3           0       66          0.98       0.67            7
    +#> 10           2           0       67          0.90       0.67            3
    +#> # ... with 3990 more draws
    +#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/fit-method-save_object.html b/docs/reference/fit-method-save_object.html index ef36b1b08..e721424a6 100644 --- a/docs/reference/fit-method-save_object.html +++ b/docs/reference/fit-method-save_object.html @@ -1,79 +1,16 @@ - - - - - - - -Save fitted model object to a file — fit-method-save_object • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Save fitted model object to a file — fit-method-save_object • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    -

    This method is a wrapper around base::saveRDS() that ensures +

    This method is a wrapper around base::saveRDS() that ensures that all posterior draws and diagnostics are saved when saving a fitted model object. Because the contents of the CmdStan output CSV files are only read into R lazily (i.e., as needed), the $save_object() method is the safest way to guarantee that everything has been read in before saving.

    -
    save_object(file, ...)
    - -

    Arguments

    - - - - - - - - - - -
    file

    (string) Path where the file should be saved.

    ...

    Other arguments to pass to base::saveRDS() besides object and file.

    - -

    See also

    +
    +
    save_object(file, ...)
    +
    - +
    +

    Arguments

    +
    file
    +

    (string) Path where the file should be saved.

    -

    Examples

    -
    # \dontrun{ -fit <- cmdstanr_example("logistic") -temp_rds_file <- tempfile(fileext = ".RDS") -fit$save_object(file = temp_rds_file) -rm(fit) +
    ...
    +

    Other arguments to pass to base::saveRDS() besides object and file.

    -fit <- readRDS(temp_rds_file) -fit$summary() -
    #> # A tibble: 105 × 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -65.9 -65.6 1.43 1.20 -68.8 -64.3 1.00 2291. -#> 2 alpha 0.379 0.380 0.216 0.220 0.0214 0.735 1.00 4664. -#> 3 beta[1] -0.668 -0.663 0.252 0.250 -1.09 -0.259 1.00 4187. -#> 4 beta[2] -0.273 -0.265 0.225 0.224 -0.646 0.0856 1.00 3736. -#> 5 beta[3] 0.682 0.679 0.261 0.258 0.260 1.10 1.00 3626. -#> 6 log_lik[1] -0.515 -0.509 0.0979 0.0970 -0.683 -0.366 1.00 4695. -#> 7 log_lik[2] -0.401 -0.380 0.144 0.136 -0.666 -0.200 1.00 4367. -#> 8 log_lik[3] -0.496 -0.462 0.218 0.200 -0.899 -0.206 1.00 4031. -#> 9 log_lik[4] -0.449 -0.432 0.150 0.144 -0.722 -0.237 1.00 3885. -#> 10 log_lik[5] -1.18 -1.17 0.276 0.276 -1.68 -0.769 1.00 4020. -#> # … with 95 more rows, and 1 more variable: ess_tail <dbl>
    # } +
    + -
    +
    +

    Examples

    +
    # \dontrun{
    +fit <- cmdstanr_example("logistic")
    +
    +temp_rds_file <- tempfile(fileext = ".RDS")
    +fit$save_object(file = temp_rds_file)
    +rm(fit)
    +
    +fit <- readRDS(temp_rds_file)
    +fit$summary()
    +#> # A tibble: 105 × 10
    +#>    variable      mean  median     sd    mad       q5      q95  rhat ess_bulk
    +#>    <chr>        <num>   <num>  <num>  <num>    <num>    <num> <num>    <num>
    +#>  1 lp__       -66.0   -65.6   1.45   1.24   -68.8    -64.3     1.00    2217.
    +#>  2 alpha        0.386   0.382 0.217  0.215    0.0345   0.754   1.00    4227.
    +#>  3 beta[1]     -0.659  -0.650 0.246  0.246   -1.07    -0.268   1.00    3920.
    +#>  4 beta[2]     -0.275  -0.277 0.226  0.226   -0.654    0.0937  1.00    4340.
    +#>  5 beta[3]      0.675   0.665 0.275  0.267    0.236    1.15    1.00    3661.
    +#>  6 log_lik[1]  -0.514  -0.508 0.0974 0.0967  -0.683   -0.365   1.00    4222.
    +#>  7 log_lik[2]  -0.410  -0.389 0.154  0.150   -0.690   -0.198   1.00    4320.
    +#>  8 log_lik[3]  -0.500  -0.469 0.218  0.206   -0.908   -0.207   1.00    4270.
    +#>  9 log_lik[4]  -0.453  -0.436 0.152  0.146   -0.729   -0.238   1.00    3898.
    +#> 10 log_lik[5]  -1.18   -1.16  0.287  0.285   -1.68    -0.760   1.00    3941.
    +#> # ℹ 95 more rows
    +#> # ℹ 1 more variable: ess_tail <num>
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/fit-method-save_output_files.html b/docs/reference/fit-method-save_output_files.html index 196500301..9f5eee91a 100644 --- a/docs/reference/fit-method-save_output_files.html +++ b/docs/reference/fit-method-save_output_files.html @@ -1,82 +1,19 @@ - - - - - - - -Save output and data files — fit-method-save_output_files • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Save output and data files — fit-method-save_output_files • cmdstanr - - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    @@ -193,76 +121,77 @@

    Save output and data files

    the current file paths without moving any files.

    -
    save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE)
    +    
    +
    save_output_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE)
    +
    +save_latent_dynamics_files(
    +  dir = ".",
    +  basename = NULL,
    +  timestamp = TRUE,
    +  random = TRUE
    +)
    +
    +save_profile_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE)
    +
    +save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE)
    +
    +output_files(include_failed = FALSE)
    +
    +profile_files(include_failed = FALSE)
    +
    +latent_dynamics_files(include_failed = FALSE)
    +
    +data_file()
    +
    + +
    +

    Arguments

    +
    dir
    +

    (string) Path to directory where the files should be saved.

    -save_latent_dynamics_files( - dir = ".", - basename = NULL, - timestamp = TRUE, - random = TRUE -) -save_profile_files(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) +
    basename
    +

    (string) Base filename to use. See Details.

    -save_data_file(dir = ".", basename = NULL, timestamp = TRUE, random = TRUE) -output_files(include_failed = FALSE) +
    timestamp
    +

    (logical) Should a timestamp be added to the file name(s)? +Defaults to TRUE. See Details.

    -profile_files(include_failed = FALSE) -latent_dynamics_files(include_failed = FALSE) +
    random
    +

    (logical) Should random alphanumeric characters be added to the +end of the file name(s)? Defaults to TRUE. See Details.

    -data_file()
    -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    dir

    (string) Path to directory where the files should be saved.

    basename

    (string) Base filename to use. See Details.

    timestamp

    (logical) Should a timestamp be added to the file name(s)? -Defaults to TRUE. See Details.

    random

    (logical) Should random alphanumeric characters be added to the -end of the file name(s)? Defaults to TRUE. See Details.

    include_failed

    (logical) Should CmdStan runs that failed also be -included? The default is FALSE.

    +
    include_failed
    +

    (logical) Should CmdStan runs that failed also be +included? The default is FALSE.

    -

    Value

    +
    +
    +

    Value

    + -

    The $save_* methods print a message with the new file paths and (invisibly) +

    The $save_* methods print a message with the new file paths and (invisibly) return a character vector of the new paths (or NA for any that couldn't be copied). They also have the side effect of setting the internal paths in the fitted model object to the new paths.

    + +

    The methods without the save_ prefix return character vectors of file paths without moving any files.

    -

    Details

    - +
    +
    +

    Details

    For $save_output_files() the files moved to dir will have names of -the form basename-timestamp-id-random, where

      -
    • basename is the user's provided basename argument;

    • -
    • timestamp is of the form format(Sys.time(), "%Y%m%d%H%M");

    • +the form basename-timestamp-id-random, where

      • basename is the user's provided basename argument;

      • +
      • timestamp is of the form format(Sys.time(), "%Y%m%d%H%M");

      • id is the MCMC chain id (or 1 for non MCMC);

      • random contains six random alphanumeric characters;

      • -
      - -

      For $save_latent_dynamics_files() everything is the same as for +

    For $save_latent_dynamics_files() everything is the same as for $save_output_files() except "-diagnostic-" is included in the new file name after basename.

    For $save_profile_files() everything is the same as for @@ -270,64 +199,69 @@

    Details file name after basename.

    For $save_data_file() no id is included in the file name because even with multiple MCMC chains the data file is the same.

    -

    See also

    - - - -

    Examples

    -
    # \dontrun{ -fit <- cmdstanr_example() -fit$output_files() -
    #> [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181226-1-549ffb.csv" -#> [2] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181226-2-549ffb.csv" -#> [3] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181226-3-549ffb.csv" -#> [4] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181226-4-549ffb.csv"
    fit$data_file() -
    #> [1] "/private/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpBlSxPc/temp_libpathbc8d32dd446a/cmdstanr/logistic.data.json"
    -# just using tempdir for the example -my_dir <- tempdir() -fit$save_output_files(dir = my_dir, basename = "banana") -
    #> Moved 4 files and set internal paths to new locations: -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/banana-202203181226-1-89ddc2.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/banana-202203181226-2-89ddc2.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/banana-202203181226-3-89ddc2.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/banana-202203181226-4-89ddc2.csv
    fit$save_output_files(dir = my_dir, basename = "tomato", timestamp = FALSE) -
    #> Moved 4 files and set internal paths to new locations: -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/tomato-1-07c277.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/tomato-2-07c277.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/tomato-3-07c277.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/tomato-4-07c277.csv
    fit$save_output_files(dir = my_dir, basename = "lettuce", timestamp = FALSE, random = FALSE) -
    #> Moved 4 files and set internal paths to new locations: -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/lettuce-1.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/lettuce-2.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/lettuce-3.csv -#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/lettuce-4.csv
    # } +
    + -
    +
    +

    Examples

    +
    # \dontrun{
    +fit <- cmdstanr_example()
    +fit$output_files()
    +#> [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251436-1-3f09a2.csv"
    +#> [2] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251436-2-3f09a2.csv"
    +#> [3] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251436-3-3f09a2.csv"
    +#> [4] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251436-4-3f09a2.csv"
    +fit$data_file()
    +#> [1] "/private/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpKFjP9Y/temp_libpath15d4242e55c52/cmdstanr/logistic.data.json"
    +
    +# just using tempdir for the example
    +my_dir <- tempdir()
    +fit$save_output_files(dir = my_dir, basename = "banana")
    +#> Moved 4 files and set internal paths to new locations:
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/banana-202307251436-1-1e5555.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/banana-202307251436-2-1e5555.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/banana-202307251436-3-1e5555.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/banana-202307251436-4-1e5555.csv
    +fit$save_output_files(dir = my_dir, basename = "tomato", timestamp = FALSE)
    +#> Moved 4 files and set internal paths to new locations:
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/tomato-1-092670.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/tomato-2-092670.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/tomato-3-092670.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/tomato-4-092670.csv
    +fit$save_output_files(dir = my_dir, basename = "lettuce", timestamp = FALSE, random = FALSE)
    +#> Moved 4 files and set internal paths to new locations:
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/lettuce-1.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/lettuce-2.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/lettuce-3.csv
    +#> - /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/lettuce-4.csv
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/fit-method-summary.html b/docs/reference/fit-method-summary.html index faf9864f2..5713a8b11 100644 --- a/docs/reference/fit-method-summary.html +++ b/docs/reference/fit-method-summary.html @@ -1,53 +1,5 @@ - - - - - - - -Compute a summary table of estimates and diagnostics — fit-method-summary • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Compute a summary table of estimates and diagnostics — fit-method-summary • cmdstanr - - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +

    The $summary() method runs -summarise_draws() from the posterior +summarise_draws() from the posterior package and returns the output. For MCMC, only post-warmup draws are included in the summary.

    There is also a $print() method that prints the same summary stats but @@ -199,130 +127,174 @@

    Compute a summary table of estimates and diagnostics

    to be able to return them to the user. See Examples.

    -
    summary(variables = NULL, ...)
    +
    +
    summary(variables = NULL, ...)
    +
    -

    Arguments

    - - - - - - - - - - -
    variables

    (character vector) The variables to include.

    ...

    Optional arguments to pass to posterior::summarise_draws().

    +
    +

    Arguments

    +
    variables
    +

    (character vector) The variables to include.

    -

    Value

    -

    The $summary() method returns the tibble data frame created by -posterior::summarise_draws().

    -

    The $print() method returns the fitted model object itself (invisibly), -which is the standard behavior for print methods in R.

    -

    See also

    +
    ...
    +

    Optional arguments to pass to posterior::summarise_draws().

    - +
    +
    +

    Value

    + -

    Examples

    -
    # \dontrun{ -fit <- cmdstanr_example("logistic") -fit$summary() -
    #> # A tibble: 105 × 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -65.9 -65.6 1.41 1.24 -68.7 -64.3 1.00 2184. -#> 2 alpha 0.376 0.378 0.218 0.220 0.00751 0.730 1.00 3633. -#> 3 beta[1] -0.663 -0.653 0.246 0.245 -1.09 -0.263 1.00 4022. -#> 4 beta[2] -0.269 -0.263 0.228 0.226 -0.649 0.107 1.00 3949. -#> 5 beta[3] 0.676 0.673 0.265 0.271 0.244 1.12 1.00 3975. -#> 6 log_lik[1] -0.517 -0.511 0.0992 0.0990 -0.692 -0.367 1.00 3507. -#> 7 log_lik[2] -0.404 -0.384 0.147 0.140 -0.665 -0.196 1.00 4695. -#> 8 log_lik[3] -0.498 -0.466 0.219 0.205 -0.906 -0.205 1.00 3889. -#> 9 log_lik[4] -0.453 -0.432 0.156 0.151 -0.746 -0.230 1.00 3814. -#> 10 log_lik[5] -1.18 -1.16 0.278 0.281 -1.66 -0.756 1.00 4117. -#> # … with 95 more rows, and 1 more variable: ess_tail <dbl>
    fit$print() -
    #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> lp__ -65.95 -65.64 1.41 1.24 -68.75 -64.27 1.00 2184 3071 -#> alpha 0.38 0.38 0.22 0.22 0.01 0.73 1.00 3633 3074 -#> beta[1] -0.66 -0.65 0.25 0.25 -1.09 -0.26 1.00 4021 2815 -#> beta[2] -0.27 -0.26 0.23 0.23 -0.65 0.11 1.00 3948 3214 -#> beta[3] 0.68 0.67 0.27 0.27 0.24 1.12 1.00 3974 3343 -#> log_lik[1] -0.52 -0.51 0.10 0.10 -0.69 -0.37 1.00 3506 2773 -#> log_lik[2] -0.40 -0.38 0.15 0.14 -0.67 -0.20 1.00 4694 3405 -#> log_lik[3] -0.50 -0.47 0.22 0.20 -0.91 -0.21 1.00 3889 3133 -#> log_lik[4] -0.45 -0.43 0.16 0.15 -0.75 -0.23 1.00 3814 3282 -#> log_lik[5] -1.18 -1.16 0.28 0.28 -1.66 -0.76 1.00 4117 3034 -#> -#> # showing 10 of 105 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
    fit$print(max_rows = 2) # same as print(fit, max_rows = 2) -
    #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> lp__ -65.95 -65.64 1.41 1.24 -68.75 -64.27 1.00 2184 3071 -#> alpha 0.38 0.38 0.22 0.22 0.01 0.73 1.00 3633 3074 -#> -#> # showing 2 of 105 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
    -# include only certain variables -fit$summary("beta") -
    #> # A tibble: 3 × 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 beta[1] -0.663 -0.653 0.246 0.245 -1.09 -0.263 1.00 4022. 2815. -#> 2 beta[2] -0.269 -0.263 0.228 0.226 -0.649 0.107 1.00 3949. 3215. -#> 3 beta[3] 0.676 0.673 0.265 0.271 0.244 1.12 1.00 3975. 3343.
    fit$print(c("alpha", "beta[2]")) -
    #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> alpha 0.38 0.38 0.22 0.22 0.01 0.73 1.00 3633 3074 -#> beta[2] -0.27 -0.26 0.23 0.23 -0.65 0.11 1.00 3948 3214
    -# include all variables but only certain summaries -fit$summary(NULL, c("mean", "sd")) -
    #> # A tibble: 105 × 3 -#> variable mean sd -#> <chr> <dbl> <dbl> -#> 1 lp__ -65.9 1.41 -#> 2 alpha 0.376 0.218 -#> 3 beta[1] -0.663 0.246 -#> 4 beta[2] -0.269 0.228 -#> 5 beta[3] 0.676 0.265 -#> 6 log_lik[1] -0.517 0.0992 -#> 7 log_lik[2] -0.404 0.147 -#> 8 log_lik[3] -0.498 0.219 -#> 9 log_lik[4] -0.453 0.156 -#> 10 log_lik[5] -1.18 0.278 -#> # … with 95 more rows
    -# can use functions created from formulas -# for example, calculate Pr(beta > 0) -fit$summary("beta", prob_gt_0 = ~ mean(. > 0)) -
    #> # A tibble: 3 × 2 -#> variable prob_gt_0 -#> <chr> <dbl> -#> 1 beta[1] 0.00175 -#> 2 beta[2] 0.116 -#> 3 beta[3] 0.997
    # } +

    The $summary() method returns the tibble data frame created by +posterior::summarise_draws().

    -
    + +

    The $print() method returns the fitted model object itself (invisibly), +which is the standard behavior for print methods in R.

    +
    + + +
    +

    Examples

    +
    # \dontrun{
    +fit <- cmdstanr_example("logistic")
    +fit$summary()
    +#> # A tibble: 105 × 10
    +#>    variable      mean  median     sd    mad       q5      q95  rhat ess_bulk
    +#>    <chr>        <num>   <num>  <num>  <num>    <num>    <num> <num>    <num>
    +#>  1 lp__       -66.0   -65.7   1.45   1.26   -68.9    -64.3     1.00    1950.
    +#>  2 alpha        0.380   0.376 0.218  0.214    0.0316   0.747   1.00    4585.
    +#>  3 beta[1]     -0.669  -0.662 0.254  0.252   -1.09    -0.248   1.00    4669.
    +#>  4 beta[2]     -0.277  -0.273 0.231  0.232   -0.665    0.0927  1.00    4400.
    +#>  5 beta[3]      0.676   0.677 0.268  0.267    0.242    1.12    1.00    4179.
    +#>  6 log_lik[1]  -0.515  -0.508 0.0986 0.0967  -0.686   -0.365   1.00    4439.
    +#>  7 log_lik[2]  -0.405  -0.387 0.146  0.140   -0.676   -0.197   1.00    4528.
    +#>  8 log_lik[3]  -0.501  -0.464 0.221  0.207   -0.917   -0.210   1.00    4266.
    +#>  9 log_lik[4]  -0.450  -0.429 0.155  0.148   -0.729   -0.229   1.00    4175.
    +#> 10 log_lik[5]  -1.18   -1.16  0.281  0.278   -1.67    -0.750   1.00    4417.
    +#> # ℹ 95 more rows
    +#> # ℹ 1 more variable: ess_tail <num>
    +fit$print()
    +#>    variable   mean median   sd  mad     q5    q95 rhat ess_bulk ess_tail
    +#>  lp__       -66.00 -65.68 1.45 1.26 -68.86 -64.27 1.00     1950     2912
    +#>  alpha        0.38   0.38 0.22 0.21   0.03   0.75 1.00     4584     2789
    +#>  beta[1]     -0.67  -0.66 0.25 0.25  -1.09  -0.25 1.00     4669     3265
    +#>  beta[2]     -0.28  -0.27 0.23 0.23  -0.67   0.09 1.00     4399     2567
    +#>  beta[3]      0.68   0.68 0.27 0.27   0.24   1.12 1.00     4179     3160
    +#>  log_lik[1]  -0.52  -0.51 0.10 0.10  -0.69  -0.37 1.00     4439     2680
    +#>  log_lik[2]  -0.40  -0.39 0.15 0.14  -0.68  -0.20 1.00     4528     3426
    +#>  log_lik[3]  -0.50  -0.46 0.22 0.21  -0.92  -0.21 1.00     4265     2821
    +#>  log_lik[4]  -0.45  -0.43 0.15 0.15  -0.73  -0.23 1.00     4175     2970
    +#>  log_lik[5]  -1.18  -1.16 0.28 0.28  -1.67  -0.75 1.00     4417     3002
    +#> 
    +#>  # showing 10 of 105 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
    +fit$print(max_rows = 2) # same as print(fit, max_rows = 2)
    +#>  variable   mean median   sd  mad     q5    q95 rhat ess_bulk ess_tail
    +#>     lp__  -66.00 -65.68 1.45 1.26 -68.86 -64.27 1.00     1950     2912
    +#>     alpha   0.38   0.38 0.22 0.21   0.03   0.75 1.00     4584     2789
    +#> 
    +#>  # showing 2 of 105 rows (change via 'max_rows' argument or 'cmdstanr_max_rows' option)
    +
    +# include only certain variables
    +fit$summary("beta")
    +#> # A tibble: 3 × 10
    +#>   variable   mean median    sd   mad     q5     q95  rhat ess_bulk ess_tail
    +#>   <chr>     <num>  <num> <num> <num>  <num>   <num> <num>    <num>    <num>
    +#> 1 beta[1]  -0.669 -0.662 0.254 0.252 -1.09  -0.248   1.00    4669.    3265.
    +#> 2 beta[2]  -0.277 -0.273 0.231 0.232 -0.665  0.0927  1.00    4400.    2568.
    +#> 3 beta[3]   0.676  0.677 0.268 0.267  0.242  1.12    1.00    4179.    3161.
    +fit$print(c("alpha", "beta[2]"))
    +#>  variable  mean median   sd  mad    q5  q95 rhat ess_bulk ess_tail
    +#>   alpha    0.38   0.38 0.22 0.21  0.03 0.75 1.00     4584     2789
    +#>   beta[2] -0.28  -0.27 0.23 0.23 -0.67 0.09 1.00     4399     2567
    +
    +# include all variables but only certain summaries
    +fit$summary(NULL, c("mean", "sd"))
    +#> # A tibble: 105 × 3
    +#>    variable      mean     sd
    +#>    <chr>        <num>  <num>
    +#>  1 lp__       -66.0   1.45  
    +#>  2 alpha        0.380 0.218 
    +#>  3 beta[1]     -0.669 0.254 
    +#>  4 beta[2]     -0.277 0.231 
    +#>  5 beta[3]      0.676 0.268 
    +#>  6 log_lik[1]  -0.515 0.0986
    +#>  7 log_lik[2]  -0.405 0.146 
    +#>  8 log_lik[3]  -0.501 0.221 
    +#>  9 log_lik[4]  -0.450 0.155 
    +#> 10 log_lik[5]  -1.18  0.281 
    +#> # ℹ 95 more rows
    +
    +# can use functions created from formulas
    +# for example, calculate Pr(beta > 0)
    +fit$summary("beta", prob_gt_0 = ~ mean(. > 0))
    +#> # A tibble: 3 × 2
    +#>   variable prob_gt_0
    +#>   <chr>        <num>
    +#> 1 beta[1]     0.0035
    +#> 2 beta[2]     0.112 
    +#> 3 beta[3]     0.995 
    +
    +# can combine user-specified functions with
    +# the default summary functions
    +fit$summary(variables = c("alpha", "beta"),
    +  posterior::default_summary_measures()[1:4],
    +  quantiles = ~ quantile2(., probs = c(0.025, 0.975)),
    +  posterior::default_convergence_measures()
    +  )
    +#> # A tibble: 4 × 10
    +#>   variable   mean median    sd   mad    q2.5  q97.5  rhat ess_bulk ess_tail
    +#>   <chr>     <num>  <num> <num> <num>   <num>  <num> <num>    <num>    <num>
    +#> 1 alpha     0.380  0.376 0.218 0.214 -0.0392  0.823  1.00    4585.    2790.
    +#> 2 beta[1]  -0.669 -0.662 0.254 0.252 -1.18   -0.180  1.00    4669.    3265.
    +#> 3 beta[2]  -0.277 -0.273 0.231 0.232 -0.736   0.165  1.00    4400.    2568.
    +#> 4 beta[3]   0.676  0.677 0.268 0.267  0.157   1.20   1.00    4179.    3161.
    +
    +# the functions need to calculate the appropriate
    +# value for a matrix input
    +fit$summary(variables = "alpha", dim)
    +#> # A tibble: 1 × 3
    +#>   variable dim.1 dim.2
    +#>   <chr>    <num> <num>
    +#> 1 alpha     1000     4
    +
    +# the usual [stats::var()] is therefore not directly suitable as it
    +# will produce a covariance matrix unless the data is converted to a vector
    +fit$print(c("alpha", "beta"), var2 = ~var(as.vector(.x)))
    +#>  variable var2
    +#>   alpha   0.05
    +#>   beta[1] 0.06
    +#>   beta[2] 0.05
    +#>   beta[3] 0.07
    +
    +# }
    +
    +
    +
    +
    -
    - - + + diff --git a/docs/reference/fit-method-time.html b/docs/reference/fit-method-time.html index dfba38e72..0f3ead7d4 100644 --- a/docs/reference/fit-method-time.html +++ b/docs/reference/fit-method-time.html @@ -1,77 +1,14 @@ - - - - - - - -Report timing of CmdStan runs — fit-method-time • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Report timing of CmdStan runs — fit-method-time • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    @@ -183,74 +111,77 @@

    Report timing of CmdStan runs

    sampling phases.

    -
    time()
    - +
    +
    time()
    +
    -

    Value

    +
    +

    Value

    + -

    A list with elements

      -
    • total: (scalar) The total run time. For MCMC this may be different than +

      A list with elements

      • total: (scalar) The total run time. For MCMC this may be different than the sum of the chain run times if parallelization was used.

      • chains: (data frame) For MCMC only, timing info for the individual chains. The data frame has columns "chain_id", "warmup", "sampling", and "total".

      • -
      - -

      See also

      - - - -

      Examples

      -
      # \dontrun{ -fit_mcmc <- cmdstanr_example("logistic", method = "sample") -fit_mcmc$time() -
      #> $total -#> [1] 0.8284969 -#> -#> $chains -#> chain_id warmup sampling total -#> 1 1 0.030 0.099 0.129 -#> 2 2 0.023 0.082 0.105 -#> 3 3 0.024 0.091 0.115 -#> 4 4 0.054 0.165 0.219 -#>
      -fit_mle <- cmdstanr_example("logistic", method = "optimize") -fit_mle$time() -
      #> $total -#> [1] 0.125402 -#>
      -fit_vb <- cmdstanr_example("logistic", method = "variational") -fit_vb$time() -
      #> $total -#> [1] 0.128371 -#>
      # } +
    + -
    +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +fit_mcmc$time()
    +#> $total
    +#> [1] 0.553201
    +#> 
    +#> $chains
    +#>   chain_id warmup sampling total
    +#> 1        1  0.021    0.068 0.089
    +#> 2        2  0.022    0.070 0.092
    +#> 3        3  0.022    0.076 0.098
    +#> 4        4  0.023    0.071 0.094
    +#> 
    +
    +fit_mle <- cmdstanr_example("logistic", method = "optimize")
    +fit_mle$time()
    +#> $total
    +#> [1] 0.1247039
    +#> 
    +
    +fit_vb <- cmdstanr_example("logistic", method = "variational")
    +fit_vb$time()
    +#> $total
    +#> [1] 0.1268399
    +#> 
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/fit-method-unconstrain_draws.html b/docs/reference/fit-method-unconstrain_draws.html new file mode 100644 index 000000000..1a27f650d --- /dev/null +++ b/docs/reference/fit-method-unconstrain_draws.html @@ -0,0 +1,185 @@ + +Transform all parameter draws to the unconstrained scale — fit-method-unconstrain_draws • cmdstanr + + +
    +
    + + + +
    +
    + + +
    +

    The $unconstrain_draws() method transforms all parameter draws to the +unconstrained scale. The method returns a list for each chain, containing the parameter +values from each iteration on the unconstrained scale. If called with no arguments, then +the draws within the fit object are unconstrained. Alternatively, either an existing +draws object or a character vector of paths to CSV files can be passed.

    +
    + +
    +
    unconstrain_draws(files = NULL, draws = NULL)
    +
    + +
    +

    Arguments

    +
    files
    +

    (character vector) The paths to the CmdStan CSV files. These can +be files generated by running CmdStanR or running CmdStan directly.

    + + +
    draws
    +

    A posterior::draws_* object.

    + +
    + + +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +fit_mcmc$init_model_methods()
    +#> Error: Model methods cannot be used with a pre-compiled Stan executable, the model must be compiled again
    +
    +# Unconstrain all internal draws
    +unconstrained_internal_draws <- fit_mcmc$unconstrain_draws()
    +#> Error: The method has not been compiled, please call `init_model_methods()` first
    +
    +# Unconstrain external CmdStan CSV files
    +unconstrained_csv <- fit_mcmc$unconstrain_draws(files = fit_mcmc$output_files())
    +#> Error: The method has not been compiled, please call `init_model_methods()` first
    +
    +# Unconstrain existing draws object
    +unconstrained_draws <- fit_mcmc$unconstrain_draws(draws = fit_mcmc$draws())
    +#> Error: The method has not been compiled, please call `init_model_methods()` first
    +# }
    +
    +
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/fit-method-unconstrain_variables.html b/docs/reference/fit-method-unconstrain_variables.html new file mode 100644 index 000000000..558713b7c --- /dev/null +++ b/docs/reference/fit-method-unconstrain_variables.html @@ -0,0 +1,165 @@ + +Transform a set of parameter values to the unconstrained scale — fit-method-unconstrain_variables • cmdstanr + + +
    +
    + + + +
    +
    + + +
    +

    The $unconstrain_variables() method transforms input parameters to +the unconstrained scale

    +
    + +
    +
    unconstrain_variables(variables)
    +
    + +
    +

    Arguments

    +
    variables
    +

    (list) A list of parameter values to transform, in the same +format as provided to the init argument of the $sample() method.

    + +
    + + +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +fit_mcmc$init_model_methods()
    +#> Error: Model methods cannot be used with a pre-compiled Stan executable, the model must be compiled again
    +fit_mcmc$unconstrain_variables(list(alpha = 0.5, beta = c(0.7, 1.1, 0.2)))
    +#> Error: The method has not been compiled, please call `init_model_methods()` first
    +# }
    +
    +
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/fit-method-variable_skeleton.html b/docs/reference/fit-method-variable_skeleton.html new file mode 100644 index 000000000..0bc3100e1 --- /dev/null +++ b/docs/reference/fit-method-variable_skeleton.html @@ -0,0 +1,171 @@ + +Return the variable skeleton needed by the utils::relist function to re-structure a +vector of constrained parameter values to a named list — fit-method-variable_skeleton • cmdstanr + + +
    +
    + + + +
    +
    + + +
    +

    The $variable_skeleton() method returns the variable skeleton

    +
    + +
    +
    variable_skeleton(transformed_parameters = TRUE, generated_quantities = TRUE)
    +
    + +
    +

    Arguments

    +
    transformed_parameters
    +

    (boolean) Whether to include transformed parameters +in the skeleton (defaults to TRUE)

    + + +
    generated_quantities
    +

    (boolean) Whether to include generated quantities +in the skeleton (defaults to TRUE)

    + +
    + + +
    +

    Examples

    +
    # \dontrun{
    +fit_mcmc <- cmdstanr_example("logistic", method = "sample")
    +fit_mcmc$init_model_methods()
    +#> Error: Model methods cannot be used with a pre-compiled Stan executable, the model must be compiled again
    +fit_mcmc$variable_skeleton()
    +#> Error: The method has not been compiled, please call `init_model_methods()` first
    +# }
    +
    +
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/index.html b/docs/reference/index.html index cbf3081a9..7c5bfb855 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -1,74 +1,12 @@ - - - - - - - -Function reference • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Function reference • cmdstanr - - + + - - -
    -
    - -
    - -
    +
    - - - - - - - - - - -
    -

    Package description

    -

    An overview of the package and how it differs from RStan.

    + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - - - - - - + + - - - + + - - - + + - - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + - - - - - - - - - - - - - - - - - - - - - - + + + + - - - - - - - - - - - - - - -
    +

    Package description

    +

    An overview of the package and how it differs from RStan.

    -

    cmdstanr-package

    +
    +

    cmdstanr-package cmdstanr CmdStanR

    CmdStanR: the R interface to CmdStan

    -

    Installing and setting the path to CmdStan

    -

    Install CmdStan, assuming the necessary C++ toolchain.

    +
    +

    Installing and setting the path to CmdStan

    +

    Install CmdStan, assuming the necessary C++ toolchain.

    +

    install_cmdstan() rebuild_cmdstan() cmdstan_make_local() check_cmdstan_toolchain()

    Install CmdStan or clean and rebuild an existing installation

    +

    set_cmdstan_path() cmdstan_path() cmdstan_version()

    Get or set the file path to the CmdStan installation

    -

    Running CmdStan from R

    -

    Run CmdStan from R.

    +
    +

    Running CmdStan from R

    +

    Run CmdStan from R.

    +

    cmdstan_model()

    Create a new CmdStanModel object

    +

    CmdStanModel

    CmdStanModel objects

    +

    check_syntax()

    Check syntax of a Stan program

    +

    compile()

    Compile a Stan program

    +

    diagnose()

    Run Stan's diagnose method

    +
    +

    expose_functions()

    +

    Expose Stan functions to R

    format()

    Run stanc's auto-formatter on the model code.

    +

    generate_quantities()

    Run Stan's standalone generated quantities method

    +

    optimize()

    Run Stan's optimization algorithms

    +

    sample()

    Run Stan's MCMC algorithms

    +

    sample_mpi()

    Run Stan's MCMC algorithms with MPI

    +

    variables()

    Input and output variables of a Stan program

    +

    variational()

    Run Stan's variational approximation algorithms

    +

    cmdstanr_example() print_example_program()

    Fit models for use in examples

    -

    Fitted model objects and methods

    +
    +

    Fitted model objects and methods

    +

    CmdStanMCMC

    CmdStanMCMC objects

    +

    CmdStanMLE

    CmdStanMLE objects

    +

    CmdStanVB

    CmdStanVB objects

    +

    CmdStanGQ

    CmdStanGQ objects

    +

    CmdStanDiagnose

    CmdStanDiagnose objects

    +

    cmdstan_summary() cmdstan_diagnose()

    Run CmdStan's stansummary and diagnose utilities

    +

    code()

    Return Stan code

    +
    +

    constrain_variables()

    +

    Transform a set of unconstrained parameter values to the constrained scale

    diagnostic_summary()

    Sampler diagnostic summaries and warnings

    +

    draws()

    Extract posterior draws

    +
    +

    grad_log_prob()

    +

    Calculate the log-probability and the gradient w.r.t. each input for a +given vector of unconstrained parameters

    gradients()

    Extract gradients after diagnostic mode

    +
    +

    hessian()

    +

    Calculate the log-probability , the gradient w.r.t. each input, and the hessian +for a given vector of unconstrained parameters

    init()

    Extract user-specified initial values

    +
    +

    init_model_methods()

    +

    Compile additional methods for accessing the model log-probability function +and parameter constraining and unconstraining.

    inv_metric()

    Extract inverse metric (mass matrix) after MCMC

    +
    +

    log_prob()

    +

    Calculate the log-probability given a provided vector of unconstrained parameters.

    loo()

    Leave-one-out cross-validation (LOO-CV)

    +

    lp() lp_approx()

    Extract log probability (target)

    +

    metadata()

    Extract metadata from CmdStan CSV files

    +

    mle()

    Extract (penalized) maximum likelihood estimate after optimization

    +

    num_chains()

    Extract number of chains after MCMC

    +

    output()

    Access console output

    +

    profiles()

    Return profiling data

    +

    return_codes()

    Extract return codes from CmdStan

    +

    sampler_diagnostics()

    Extract sampler diagnostics after MCMC

    +

    save_object()

    Save fitted model object to a file

    +

    save_output_files() save_latent_dynamics_files() save_profile_files() save_data_file() output_files() profile_files() latent_dynamics_files() data_file()

    Save output and data files

    +

    summary()

    Compute a summary table of estimates and diagnostics

    +

    time()

    Report timing of CmdStan runs

    -

    Other tools for working with CmdStan

    +
    +

    unconstrain_draws()

    +

    Transform all parameter draws to the unconstrained scale

    +

    unconstrain_variables()

    +

    Transform a set of parameter values to the unconstrained scale

    +

    variable_skeleton()

    +

    Return the variable skeleton needed by the utils::relist function to re-structure a +vector of constrained parameter values to a named list

    +

    expose_functions()

    +

    Expose Stan functions to R

    +

    Other tools for working with CmdStan

    +

    read_cmdstan_csv() as_cmdstan_fit()

    Read CmdStan CSV files into R

    +

    write_stan_json()

    Write data to a JSON file readable by CmdStan

    +

    write_stan_file()

    Write Stan code to a file

    +

    draws_to_csv()

    Write posterior draws objects to CSV files suitable for running standalone generated quantities with CmdStan.

    -

    Using CmdStanR with knitr and R Markdown

    +
    +

    as_mcmc.list()

    +

    Convert CmdStanMCMC to mcmc.list

    +

    as_draws(<CmdStanMCMC>) as_draws(<CmdStanMLE>) as_draws(<CmdStanVB>) as_draws(<CmdStanGQ>)

    +

    Create a draws object from a CmdStanR fitted model object

    +

    Using CmdStanR with knitr and R Markdown

    +

    register_knitr_engine()

    Register CmdStanR's knitr engine for Stan

    +

    eng_cmdstan()

    CmdStan knitr engine for Stan

    - +
    +
    -
    - - + + diff --git a/docs/reference/install_cmdstan.html b/docs/reference/install_cmdstan.html index 85948b739..5995ec079 100644 --- a/docs/reference/install_cmdstan.html +++ b/docs/reference/install_cmdstan.html @@ -1,53 +1,5 @@ - - - - - - - -Install CmdStan or clean and rebuild an existing installation — install_cmdstan • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Install CmdStan or clean and rebuild an existing installation — install_cmdstan • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +

    The install_cmdstan() function attempts to download and -install the latest release of CmdStan. +install the latest release of CmdStan. Installing a previous release or a new release candidate is also possible by specifying the version or release_url argument. See the first few sections of the CmdStan -installation guide +installation guide for details on the C++ toolchain required for installing CmdStan.

    The rebuild_cmdstan() function cleans and rebuilds the CmdStan installation. Use this function in case of any issues when compiling models.

    @@ -213,154 +141,161 @@

    Install CmdStan or clean and rebuild an existing installation

    be called directly by the user.

    -
    install_cmdstan(
    -  dir = NULL,
    -  cores = getOption("mc.cores", 2),
    -  quiet = FALSE,
    -  overwrite = FALSE,
    -  timeout = 1200,
    -  version = NULL,
    -  release_url = NULL,
    -  cpp_options = list(),
    -  check_toolchain = TRUE
    -)
    -
    -rebuild_cmdstan(
    -  dir = cmdstan_path(),
    -  cores = getOption("mc.cores", 2),
    -  quiet = FALSE,
    -  timeout = 600
    -)
    -
    -cmdstan_make_local(dir = cmdstan_path(), cpp_options = NULL, append = TRUE)
    -
    -check_cmdstan_toolchain(fix = FALSE, quiet = FALSE)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    dir

    (string) The path to the directory in which to install CmdStan. +

    +
    install_cmdstan(
    +  dir = NULL,
    +  cores = getOption("mc.cores", 2),
    +  quiet = FALSE,
    +  overwrite = FALSE,
    +  timeout = 1200,
    +  version = NULL,
    +  release_url = NULL,
    +  cpp_options = list(),
    +  check_toolchain = TRUE,
    +  wsl = FALSE
    +)
    +
    +rebuild_cmdstan(
    +  dir = cmdstan_path(),
    +  cores = getOption("mc.cores", 2),
    +  quiet = FALSE,
    +  timeout = 600
    +)
    +
    +cmdstan_make_local(dir = cmdstan_path(), cpp_options = NULL, append = TRUE)
    +
    +check_cmdstan_toolchain(fix = FALSE, quiet = FALSE)
    +
    + +
    +

    Arguments

    +
    dir
    +

    (string) The path to the directory in which to install CmdStan. The default is to install it in a directory called .cmdstan within the -user's home directory (i.e, file.path(Sys.getenv("HOME"), ".cmdstan")).

    cores

    (integer) The number of CPU cores to use to parallelize building +user's home directory (i.e, file.path(Sys.getenv("HOME"), ".cmdstan")).

    + + +
    cores
    +

    (integer) The number of CPU cores to use to parallelize building CmdStan and speed up installation. If cores is not specified then the default is to look for the option "mc.cores", which can be set for an -entire R session by options(mc.cores=value). If the "mc.cores" option -has not been set then the default is 2.

    quiet

    (logical) For install_cmdstan(), should the verbose output +entire R session by options(mc.cores=value). If the "mc.cores" option +has not been set then the default is 2.

    + + +
    quiet
    +

    (logical) For install_cmdstan(), should the verbose output from the system processes be suppressed when building the CmdStan binaries? The default is FALSE. For check_cmdstan_toolchain(), should the function suppress printing informational messages? The default is FALSE. -If TRUE only errors will be printed.

    overwrite

    (logical) Should CmdStan still be downloaded and installed +If TRUE only errors will be printed.

    + + +
    overwrite
    +

    (logical) Should CmdStan still be downloaded and installed even if an installation of the same version is found in dir? The default is FALSE, in which case an informative error is thrown instead of -overwriting the user's installation.

    timeout

    (positive real) Timeout (in seconds) for the build stage of -the installation.

    version

    (string) The CmdStan release version to install. The default +overwriting the user's installation.

    + + +
    timeout
    +

    (positive real) Timeout (in seconds) for the build stage of +the installation.

    + + +
    version
    +

    (string) The CmdStan release version to install. The default is NULL, which downloads the latest stable release from -https://github.com/stan-dev/cmdstan/releases.

    release_url

    (string) The URL for the specific CmdStan release or -release candidate to install. See https://github.com/stan-dev/cmdstan/releases. +https://github.com/stan-dev/cmdstan/releases.

    + + +
    release_url
    +

    (string) The URL for the specific CmdStan release or +release candidate to install. See https://github.com/stan-dev/cmdstan/releases. The URL should point to the tarball (.tar.gz. file) itself, e.g., release_url="https://github.com/stan-dev/cmdstan/releases/download/v2.25.0/cmdstan-2.25.0.tar.gz". -If both version and release_url are specified then version will be used.

    cpp_options

    (list) Any makefile flags/variables to be written to -the make/local file. For example, list("CXX" = "clang++") will force -the use of clang for compilation.

    check_toolchain

    (logical) Should install_cmdstan() attempt to check +If both version and release_url are specified then version will be used.

    + + +
    cpp_options
    +

    (list) Any makefile flags/variables to be written to +the make/local file. For example, list("CXX" = "clang++") will force +the use of clang for compilation.

    + + +
    check_toolchain
    +

    (logical) Should install_cmdstan() attempt to check that the required toolchain is installed and properly configured. The -default is TRUE.

    append

    (logical) For cmdstan_make_local(), should the listed +default is TRUE.

    + + +
    wsl
    +

    (logical) Should CmdStan be installed and run through the Windows +Subsystem for Linux (WSL). The default is FALSE.

    + + +
    append
    +

    (logical) For cmdstan_make_local(), should the listed makefile flags be appended to the end of the existing make/local file? -The default is TRUE. If FALSE the file is overwritten.

    fix

    For check_cmdstan_toolchain(), should CmdStanR attempt to fix +The default is TRUE. If FALSE the file is overwritten.

    + + +
    fix
    +

    For check_cmdstan_toolchain(), should CmdStanR attempt to fix any detected toolchain problems? Currently this option is only available on Windows. The default is FALSE, in which case problems are only reported -along with suggested fixes.

    +along with suggested fixes.

    -

    Value

    +
    +
    +

    Value

    + -

    For cmdstan_make_local(), if cpp_options=NULL then the existing +

    For cmdstan_make_local(), if cpp_options=NULL then the existing contents of make/local are returned without writing anything, otherwise the updated contents are returned.

    +
    -

    Examples

    -
    # \dontrun{ -check_cmdstan_toolchain() -
    #> The C++ toolchain required for CmdStan is setup properly!
    -# install_cmdstan(cores = 4) - -cpp_options <- list( - "CXX" = "clang++", - "CXXFLAGS+= -march=native", - PRECOMPILED_HEADERS = TRUE -) -# cmdstan_make_local(cpp_options = cpp_options) -# rebuild_cmdstan() -# } - -
    +
    +

    Examples

    +
    # \dontrun{
    +check_cmdstan_toolchain()
    +#> The C++ toolchain required for CmdStan is setup properly!
    +
    +# install_cmdstan(cores = 4)
    +
    +cpp_options <- list(
    +  "CXX" = "clang++",
    +  "CXXFLAGS+= -march=native",
    +  PRECOMPILED_HEADERS = TRUE
    +)
    +# cmdstan_make_local(cpp_options = cpp_options)
    +# rebuild_cmdstan()
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/model-method-check_syntax.html b/docs/reference/model-method-check_syntax.html index 933874799..1d321677e 100644 --- a/docs/reference/model-method-check_syntax.html +++ b/docs/reference/model-method-check_syntax.html @@ -1,77 +1,14 @@ - - - - - - - -Check syntax of a Stan program — model-method-check_syntax • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Check syntax of a Stan program — model-method-check_syntax • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    -

    The $check_syntax() method of a CmdStanModel object +

    The $check_syntax() method of a CmdStanModel object checks the Stan program for syntax errors and returns TRUE (invisibly) if parsing succeeds. If invalid syntax in found an error is thrown.

    -
    check_syntax(
    -  pedantic = FALSE,
    -  include_paths = NULL,
    -  stanc_options = list(),
    -  quiet = FALSE
    -)
    +
    +
    check_syntax(
    +  pedantic = FALSE,
    +  include_paths = NULL,
    +  stanc_options = list(),
    +  quiet = FALSE
    +)
    +
    -

    Arguments

    - - - - - - - - - - - - - - - - - - -
    pedantic

    (logical) Should pedantic mode be turned on? The default is +

    +

    Arguments

    +
    pedantic
    +

    (logical) Should pedantic mode be turned on? The default is FALSE. Pedantic mode attempts to warn you about potential issues in your -Stan program beyond syntax errors. For details see the Pedantic mode chapter in -the Stan Reference Manual.

    include_paths

    (character vector) Paths to directories where Stan +Stan program beyond syntax errors. For details see the Pedantic mode chapter in +the Stan Reference Manual.

    + + +
    include_paths
    +

    (character vector) Paths to directories where Stan should look for files specified in #include directives in the Stan -program.

    stanc_options

    (list) Any other Stan-to-C++ transpiler options to be +program.

    + + +
    stanc_options
    +

    (list) Any other Stan-to-C++ transpiler options to be used when compiling the model. See the documentation for the -$compile() method for details.

    quiet

    (logical) Should informational messages be suppressed? The +$compile() method for details.

    + + +
    quiet
    +

    (logical) Should informational messages be suppressed? The default is FALSE, which will print a message if the Stan program is valid or the compiler error message if there are syntax errors. If TRUE, only -the error message will be printed.

    +the error message will be printed.

    -

    Value

    +
    +
    +

    Value

    + -

    The $check_syntax() method returns TRUE (invisibly) if the model +

    The $check_syntax() method returns TRUE (invisibly) if the model is valid.

    -

    See also

    - -

    The CmdStanR website -(mc-stan.org/cmdstanr) for online +

    +
    +

    See also

    +

    The CmdStanR website +(mc-stan.org/cmdstanr) for online documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    - -

    Other CmdStanModel methods: -model-method-compile, -model-method-diagnose, -model-method-format, -model-method-generate-quantities, -model-method-optimize, -model-method-sample_mpi, -model-method-sample, -model-method-variables, -model-method-variational

    - -

    Examples

    -
    # \dontrun{ -file <- write_stan_file(" -data { - int N; - int y[N]; -} -parameters { - // should have <lower=0> but omitting to demonstrate pedantic mode - real lambda; -} -model { - y ~ poisson(lambda); -} -") -mod <- cmdstan_model(file, compile = FALSE) - -# the program is syntactically correct, however... -mod$check_syntax() -
    #> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/model_74b584fd23ea8c78eceda86d1d425fe8.stan', line 4, column 2: Declaration -#> of arrays by placing brackets after a variable name is deprecated and -#> will be removed in Stan 2.32.0. Instead use the array keyword before the -#> type. This can be changed automatically using the auto-format flag to -#> stanc
    #> Stan program is syntactically correct
    -# pedantic mode will warn that lambda should be constrained to be positive -# and that lambda has no prior distribution -mod$check_syntax(pedantic = TRUE) -
    #> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/model_74b584fd23ea8c78eceda86d1d425fe8.stan', line 4, column 2: Declaration -#> of arrays by placing brackets after a variable name is deprecated and -#> will be removed in Stan 2.32.0. Instead use the array keyword before the -#> type. This can be changed automatically using the auto-format flag to -#> stanc -#> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/model_74b584fd23ea8c78eceda86d1d425fe8.stan', line 11, column 14: A -#> poisson distribution is given parameter lambda as a rate parameter -#> (argument 1), but lambda was not constrained to be strictly positive. -#> Warning: The parameter lambda has no priors.
    #> Stan program is syntactically correct
    +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +file <- write_stan_file("
    +data {
    +  int N;
    +  int y[N];
    +}
    +parameters {
    +  // should have <lower=0> but omitting to demonstrate pedantic mode
    +  real lambda;
    +}
    +model {
    +  y ~ poisson(lambda);
    +}
    +")
    +mod <- cmdstan_model(file, compile = FALSE)
    +
    +# the program is syntactically correct, however...
    +mod$check_syntax()
    +#> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/model_74b584fd23ea8c78eceda86d1d425fe8.stan', line 4, column 2: Declaration
    +#>     of arrays by placing brackets after a variable name is deprecated and
    +#>     will be removed in Stan 2.33.0. Instead use the array keyword before the
    +#>     type. This can be changed automatically using the auto-format flag to
    +#>     stanc
    +#> Stan program is syntactically correct
    +
    +# pedantic mode will warn that lambda should be constrained to be positive
    +# and that lambda has no prior distribution
    +mod$check_syntax(pedantic = TRUE)
    +#> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/model_74b584fd23ea8c78eceda86d1d425fe8.stan', line 4, column 2: Declaration
    +#>     of arrays by placing brackets after a variable name is deprecated and
    +#>     will be removed in Stan 2.33.0. Instead use the array keyword before the
    +#>     type. This can be changed automatically using the auto-format flag to
    +#>     stanc
    +#> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/model_74b584fd23ea8c78eceda86d1d425fe8.stan', line 11, column 14: A
    +#>     poisson distribution is given parameter lambda as a rate parameter
    +#>     (argument 1), but lambda was not constrained to be strictly positive.
    +#> Warning: The parameter lambda has no priors. This means either no prior is
    +#>     provided, or the prior(s) depend on data variables. In the later case,
    +#>     this may be a false positive.
    +#> Stan program is syntactically correct
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/model-method-compile.html b/docs/reference/model-method-compile.html index b0c7f4429..63b0fa515 100644 --- a/docs/reference/model-method-compile.html +++ b/docs/reference/model-method-compile.html @@ -1,53 +1,5 @@ - - - - - - - -Compile a Stan program — model-method-compile • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Compile a Stan program — model-method-compile • cmdstanr - - - - - - - - - - - + + - - - -
    -
    - -
    - -
    +
    -

    The $compile() method of a CmdStanModel object checks the +

    The $compile() method of a CmdStanModel object checks the syntax of the Stan program, translates the program to C++, and creates a compiled executable. To just check the syntax of a Stan program without -compiling it use the $check_syntax() method +compiling it use the $check_syntax() method instead.

    In most cases the user does not need to explicitly call the $compile() -method as compilation will occur when calling cmdstan_model(). However it -is possible to set compile=FALSE in the call to cmdstan_model() and +method as compilation will occur when calling cmdstan_model(). However it +is possible to set compile=FALSE in the call to cmdstan_model() and subsequently call the $compile() method directly.

    After compilation, the paths to the executable and the .hpp file containing the generated C++ code are available via the $exe_file() and @@ -207,167 +135,195 @@

    Compile a Stan program

    $save_hpp_file(dir).

    -
    compile(
    -  quiet = TRUE,
    -  dir = NULL,
    -  pedantic = FALSE,
    -  include_paths = NULL,
    -  user_header = NULL,
    -  cpp_options = list(),
    -  stanc_options = list(),
    -  force_recompile = getOption("cmdstanr_force_recompile", default = FALSE),
    -  threads = FALSE
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    quiet

    (logical) Should the verbose output from CmdStan during +

    +
    compile(
    +  quiet = TRUE,
    +  dir = NULL,
    +  pedantic = FALSE,
    +  include_paths = NULL,
    +  user_header = NULL,
    +  cpp_options = list(),
    +  stanc_options = list(),
    +  force_recompile = getOption("cmdstanr_force_recompile", default = FALSE),
    +  compile_model_methods = FALSE,
    +  compile_hessian_method = FALSE,
    +  compile_standalone = FALSE,
    +  threads = FALSE
    +)
    +
    + +
    +

    Arguments

    +
    quiet
    +

    (logical) Should the verbose output from CmdStan during compilation be suppressed? The default is TRUE, but if you encounter an error we recommend trying again with quiet=FALSE to see more of the -output.

    dir

    (string) The path to the directory in which to store the CmdStan +output.

    + + +
    dir
    +

    (string) The path to the directory in which to store the CmdStan executable (or .hpp file if using $save_hpp_file()). The default is the -same location as the Stan program.

    pedantic

    (logical) Should pedantic mode be turned on? The default is +same location as the Stan program.

    + + +
    pedantic
    +

    (logical) Should pedantic mode be turned on? The default is FALSE. Pedantic mode attempts to warn you about potential issues in your -Stan program beyond syntax errors. For details see the Pedantic mode chapter in +Stan program beyond syntax errors. For details see the Pedantic mode chapter in the Stan Reference Manual. Note: to do a pedantic check for a model without compiling it or for a model that is already compiled the -$check_syntax() method can be used instead.

    include_paths

    (character vector) Paths to directories where Stan +$check_syntax() method can be used instead.

    + + +
    include_paths
    +

    (character vector) Paths to directories where Stan should look for files specified in #include directives in the Stan -program.

    user_header

    (string) The path to a C++ file (with a .hpp extension) -to compile with the Stan model.

    cpp_options

    (list) Any makefile options to be used when compiling the +program.

    + + +
    user_header
    +

    (string) The path to a C++ file (with a .hpp extension) +to compile with the Stan model.

    + + +
    cpp_options
    +

    (list) Any makefile options to be used when compiling the model (STAN_THREADS, STAN_MPI, STAN_OPENCL, etc.). Anything you would otherwise write in the make/local file. For an example of using threading see the Stan case study -Reduce Sum: A Minimal Example.

    stanc_options

    (list) Any Stan-to-C++ transpiler options to be used +Reduce Sum: A Minimal Example.

    + + +
    stanc_options
    +

    (list) Any Stan-to-C++ transpiler options to be used when compiling the model. See the Examples section below as well as the stanc chapter of the CmdStan Guide for more details on available options: -https://mc-stan.org/docs/cmdstan-guide/stanc.html.

    force_recompile

    (logical) Should the model be recompiled even if was +https://mc-stan.org/docs/cmdstan-guide/stanc.html.

    + + +
    force_recompile
    +

    (logical) Should the model be recompiled even if was not modified since last compiled. The default is FALSE. Can also be set -via a global cmdstanr_force_recompile option.

    threads

    Deprecated and will be removed in a future release. Please -turn on threading via cpp_options = list(stan_threads = TRUE) instead.

    - -

    Value

    - -

    The $compile() method is called for its side effect of creating the -executable and adding its path to the CmdStanModel object, but it also -returns the CmdStanModel object invisibly.

    -

    After compilation, the $exe_file(), $hpp_file(), and $save_hpp_file() -methods can be used and return file paths.

    -

    See also

    - -

    The $check_syntax() method to check +via a global cmdstanr_force_recompile option.

    + + +
    compile_model_methods
    +

    (logical) Compile additional model methods +(log_prob(), grad_log_prob(), constrain_variables(), +unconstrain_variables()).

    + + +
    compile_hessian_method
    +

    (logical) Should the (experimental) hessian() method be +be compiled with the model methods?

    + + +
    compile_standalone
    +

    (logical) Should functions in the Stan model be +compiled for use in R? If TRUE the functions will be available via the +functions field in the compiled model object. This can also be done after +compilation using the +$expose_functions() method.

    + + +
    threads
    +

    Deprecated and will be removed in a future release. Please +turn on threading via cpp_options = list(stan_threads = TRUE) instead.

    + +
    +
    +

    Value

    + + +

    The $compile() method is called for its side effect of creating the +executable and adding its path to the CmdStanModel object, but it also +returns the CmdStanModel object invisibly.

    + + +

    After compilation, the $exe_file(), $hpp_file(), and $save_hpp_file()

    + + +

    methods can be used and return file paths.

    +
    +
    +

    See also

    +

    The $check_syntax() method to check Stan syntax or enable pedantic model without compiling.

    The CmdStanR website -(mc-stan.org/cmdstanr) for online +(mc-stan.org/cmdstanr) for online documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    - -

    Other CmdStanModel methods: -model-method-check_syntax, -model-method-diagnose, -model-method-format, -model-method-generate-quantities, -model-method-optimize, -model-method-sample_mpi, -model-method-sample, -model-method-variables, -model-method-variational

    - -

    Examples

    -
    # \dontrun{ -file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan") - -# by default compilation happens when cmdstan_model() is called. -# to delay compilation until calling the $compile() method set compile=FALSE -mod <- cmdstan_model(file, compile = FALSE) -mod$compile() -mod$exe_file() -
    #> [1] "/Users/jgabry/.cmdstan/cmdstan-2.29.1/examples/bernoulli/bernoulli"
    -# turn on threading support (for using functions that support within-chain parallelization) -mod$compile(force_recompile = TRUE, cpp_options = list(stan_threads = TRUE)) -mod$exe_file() -
    #> [1] "/Users/jgabry/.cmdstan/cmdstan-2.29.1/examples/bernoulli/bernoulli"
    -# turn on pedantic mode (new in Stan v2.24) -file_pedantic <- write_stan_file(" -parameters { - real sigma; // pedantic mode will warn about missing <lower=0> -} -model { - sigma ~ exponential(1); -} -") -mod <- cmdstan_model(file_pedantic, pedantic = TRUE) -
    #> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/model-d65457acfdf5.stan', line 6, column 2: Parameter -#> sigma is given a exponential distribution, which has strictly positive -#> support, but sigma was not constrained to be strictly positive.
    -# } - -
    +

    The Stan and CmdStan documentation:

    Other CmdStanModel methods: +model-method-check_syntax, +model-method-diagnose, +model-method-expose_functions, +model-method-format, +model-method-generate-quantities, +model-method-optimize, +model-method-sample_mpi, +model-method-sample, +model-method-variables, +model-method-variational

    +
    + +
    +

    Examples

    +
    # \dontrun{
    +file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan")
    +
    +# by default compilation happens when cmdstan_model() is called.
    +# to delay compilation until calling the $compile() method set compile=FALSE
    +mod <- cmdstan_model(file, compile = FALSE)
    +mod$compile()
    +mod$exe_file()
    +#> [1] "/Users/jgabry/.cmdstan/cmdstan-2.32.2/examples/bernoulli/bernoulli"
    +
    +# turn on threading support (for using functions that support within-chain parallelization)
    +mod$compile(force_recompile = TRUE, cpp_options = list(stan_threads = TRUE))
    +mod$exe_file()
    +#> [1] "/Users/jgabry/.cmdstan/cmdstan-2.32.2/examples/bernoulli/bernoulli"
    +
    +# turn on pedantic mode (new in Stan v2.24)
    +file_pedantic <- write_stan_file("
    +parameters {
    +  real sigma;  // pedantic mode will warn about missing <lower=0>
    +}
    +model {
    +  sigma ~ exponential(1);
    +}
    +")
    +mod <- cmdstan_model(file_pedantic, pedantic = TRUE)
    +#> Warning in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/model-1621885c67f.stan', line 6, column 2: Parameter
    +#>     sigma is given a exponential distribution, which has strictly positive
    +#>     support, but sigma was not constrained to be strictly positive.
    +
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/model-method-diagnose.html b/docs/reference/model-method-diagnose.html index 602c34747..548b5959b 100644 --- a/docs/reference/model-method-diagnose.html +++ b/docs/reference/model-method-diagnose.html @@ -1,79 +1,16 @@ - - - - - - - -Run Stan's diagnose method — model-method-diagnose • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run Stan's diagnose method — model-method-diagnose • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    -

    The $diagnose() method of a CmdStanModel object +

    The $diagnose() method of a CmdStanModel object runs Stan's basic diagnostic feature that will calculate the gradients of the initial state and compare them with gradients calculated by finite differences. Discrepancies between the two indicate that there is a problem with the model or initial states or else there is a bug in Stan.

    -
    diagnose(
    -  data = NULL,
    -  seed = NULL,
    -  init = NULL,
    -  output_dir = NULL,
    -  output_basename = NULL,
    -  epsilon = NULL,
    -  error = NULL
    -)
    +
    +
    diagnose(
    +  data = NULL,
    +  seed = NULL,
    +  init = NULL,
    +  output_dir = NULL,
    +  output_basename = NULL,
    +  epsilon = NULL,
    +  error = NULL
    +)
    +
    -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    data

    (multiple options) The data to use for the variables specified in -the data block of the Stan program. One of the following:

      -
    • A named list of R objects with the names corresponding to variables +

      +

      Arguments

      +
      data
      +

      (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

      • A named list of R objects with the names corresponding to variables declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using write_stan_json(). See -write_stan_json() for details on the conversions performed on R objects +written to JSON for CmdStan using write_stan_json(). See +write_stan_json() for details on the conversions performed on R objects before they are passed to Stan.

      • A path to a data file compatible with CmdStan (JSON or R dump). See the appendices in the CmdStan guide for details on using these formats.

      • NULL or an empty list if the Stan program has no data block.

      • -
    seed

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. + + + +

    seed
    +

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single seed will automatically be augmented by the the run (chain) ID so that each chain uses a different seed. The exception is the transformed data block, which defaults to @@ -224,19 +151,18 @@

    Arg chains if RNG functions are used. The only time seed should be specified as a vector (one element per chain) is if RNG functions are used in transformed data and the goal is to generate different data for each -chain.

    init

    (multiple options) The initialization method to use for the +chain.

    + + +
    init
    +

    (multiple options) The initialization method to use for the variables declared in the parameters block of the Stan program. One of -the following:

      -
    • A real number x>0. This initializes all parameters randomly between +the following:

      • A real number x>0. This initializes all parameters randomly between [-x,x] on the unconstrained parameter space.;

      • The number 0. This initializes all parameters to 0;

      • A character vector of paths (one per chain) to JSON or Rdump files containing initial values for all or some parameters. See -write_stan_json() to write R objects to JSON files compatible with +write_stan_json() to write R objects to JSON files compatible with CmdStan.

      • A list of lists containing initial values for all or some parameters. For MCMC the list should contain a sublist for each chain. For optimization and @@ -249,105 +175,104 @@

        Arg has argument chain_id it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See Examples.

      • -
    output_dir

    (string) A path to a directory where CmdStan should write + + + +

    output_dir
    +

    (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at NULL (temporary directory) since CmdStanR makes the CmdStan output (posterior draws and diagnostics) available in R via methods of the fitted -model objects. The behavior of output_dir is as follows:

      -
    • If NULL (the default), then the CSV files are written to a temporary +model objects. The behavior of output_dir is as follows:

      • If NULL (the default), then the CSV files are written to a temporary directory and only saved permanently if the user calls one of the $save_* methods of the fitted model object (e.g., -$save_output_files()). These temporary +$save_output_files()). These temporary files are removed when the fitted model object is -garbage collected (manually or automatically).

      • +garbage collected (manually or automatically).

      • If a path, then the files are created in output_dir with names corresponding to the defaults used by $save_output_files().

      • -
    output_basename

    (string) A string to use as a prefix for the names of + + + +

    output_basename
    +

    (string) A string to use as a prefix for the names of the output CSV files of CmdStan. If NULL (the default), the basename of the output CSV files will be comprised from the model name, timestamp, and -5 random characters.

    epsilon

    (positive real) The finite difference step size. Default -value is 1e-6.

    error

    (positive real) The error threshold. Default value is 1e-6.

    +5 random characters.

    -

    Value

    -

    A CmdStanDiagnose object.

    -

    See also

    +
    epsilon
    +

    (positive real) The finite difference step size. Default +value is 1e-6.

    -

    The CmdStanR website -(mc-stan.org/cmdstanr) for online -documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    -

    Other CmdStanModel methods: -model-method-check_syntax, -model-method-compile, -model-method-format, -model-method-generate-quantities, -model-method-optimize, -model-method-sample_mpi, -model-method-sample, -model-method-variables, -model-method-variational

    +
    error
    +

    (positive real) The error threshold. Default value is 1e-6.

    -

    Examples

    -
    # \dontrun{ -test <- cmdstanr_example("logistic", method = "diagnose") +
    +
    +

    Value

    + -# retrieve the gradients -test$gradients() -
    #> param_idx value model finite_diff error -#> 1 0 1.3362100 -9.33442 -9.33442 2.88445e-08 -#> 2 1 1.7816500 -32.65590 -32.65590 -2.89809e-10 -#> 3 2 -0.0718337 -2.56116 -2.56116 -2.13148e-08 -#> 4 3 -1.9334500 29.16060 29.16060 -3.65847e-08
    # } +

    A CmdStanDiagnose object.

    +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +test <- cmdstanr_example("logistic", method = "diagnose")
    +
    +# retrieve the gradients
    +test$gradients()
    +#>   param_idx     value     model finite_diff        error
    +#> 1         0 -1.310990  28.37700    28.37700 -1.92695e-08
    +#> 2         1 -1.931430   8.55341     8.55341 -1.60934e-08
    +#> 3         2  0.751196 -12.17180   -12.17180 -2.15070e-08
    +#> 4         3 -1.808550  25.15020    25.15020  6.91254e-09
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/model-method-expose_functions.html b/docs/reference/model-method-expose_functions.html new file mode 100644 index 000000000..ad674e0e1 --- /dev/null +++ b/docs/reference/model-method-expose_functions.html @@ -0,0 +1,231 @@ + +Expose Stan functions to R — model-method-expose_functions • cmdstanr + + +
    +
    + + + +
    +
    + + +
    +

    The $expose_functions() method of a CmdStanModel object +will compile the functions in the Stan program's functions block and +expose them for use in R. This can also be specified via the +compile_standalone argument to the $compile() +method.

    +

    This method is also available for fitted model objects (CmdStanMCMC, CmdStanVB, etc.). +See Examples.

    +

    Note: there may be many compiler warnings emitted during compilation but +these can be ignored so long as they are warnings and not errors.

    +
    + +
    +
    expose_functions(global = FALSE, verbose = FALSE)
    +
    + +
    +

    Arguments

    +
    global
    +

    (logical) Should the functions be added to the Global +Environment? The default is FALSE, in which case the functions are +available via the functions field of the R6 object.

    + + +
    verbose
    +

    (logical) Should detailed information about generated code be +printed to the console? Defaults to FALSE.

    + +
    +
    +

    See also

    +

    The CmdStanR website +(mc-stan.org/cmdstanr) for online +documentation and tutorials.

    +

    The Stan and CmdStan documentation:

    Other CmdStanModel methods: +model-method-check_syntax, +model-method-compile, +model-method-diagnose, +model-method-format, +model-method-generate-quantities, +model-method-optimize, +model-method-sample_mpi, +model-method-sample, +model-method-variables, +model-method-variational

    +
    + +
    +

    Examples

    +
    # \dontrun{
    +stan_file <- write_stan_file(
    + "
    + functions {
    +   real a_plus_b(real a, real b) {
    +     return a + b;
    +   }
    + }
    + parameters {
    +   real x;
    + }
    + model {
    +   x ~ std_normal();
    + }
    + "
    +)
    +mod <- cmdstan_model(stan_file)
    +mod$expose_functions()
    +#> Compiling standalone functions...
    +mod$functions$a_plus_b(1, 2)
    +#> [1] 3
    +
    +fit <- mod$sample(refresh = 0)
    +#> Running MCMC with 4 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> Chain 3 finished in 0.0 seconds.
    +#> Chain 4 finished in 0.0 seconds.
    +#> 
    +#> All 4 chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.6 seconds.
    +#> 
    +fit$expose_functions() # already compiled because of above but this would compile them otherwise
    +#> Functions already compiled, nothing to do!
    +fit$functions$a_plus_b(1, 2)
    +#> [1] 3
    +# }
    +
    +
    +
    +
    +
    + +
    + + +
    + + + + + + + + diff --git a/docs/reference/model-method-format.html b/docs/reference/model-method-format.html index 9bb9f867e..041779776 100644 --- a/docs/reference/model-method-format.html +++ b/docs/reference/model-method-format.html @@ -1,77 +1,14 @@ - - - - - - - -Run stanc's auto-formatter on the model code. — model-method-format • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run stanc's auto-formatter on the model code. — model-method-format • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    -

    The $format() method of a CmdStanModel object +

    The $format() method of a CmdStanModel object runs stanc's auto-formatter on the model code. Either saves the formatted model directly back to the file or prints it for inspection.

    -
    format(
    -  overwrite_file = FALSE,
    -  canonicalize = FALSE,
    -  backup = TRUE,
    -  max_line_length = NULL,
    -  quiet = FALSE
    -)
    +
    +
    format(
    +  overwrite_file = FALSE,
    +  canonicalize = FALSE,
    +  backup = TRUE,
    +  max_line_length = NULL,
    +  quiet = FALSE
    +)
    +
    + +
    +

    Arguments

    +
    overwrite_file
    +

    (logical) Should the formatted code be written back +to the input model file. The default is FALSE.

    + -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    overwrite_file

    (logical) Should the formatted code be written back -to the input model file. The default is FALSE.

    canonicalize

    (list or logical) Defines whether or not the compiler +

    canonicalize
    +

    (list or logical) Defines whether or not the compiler should 'canonicalize' the Stan model, removing things like deprecated syntax. Default is FALSE. If TRUE, all canonicalizations are run. You can also supply a list of strings which represent options. In that case the options -are passed to stanc (new in Stan 2.29). See the User's guide section -for available canonicalization options.

    backup

    (logical) If TRUE, create stanfile.bak backups before +are passed to stanc (new in Stan 2.29). See the User's guide section +for available canonicalization options.

    + + +
    backup
    +

    (logical) If TRUE, create stanfile.bak backups before writing to the file. Disable this option if you're sure you have other copies of the file or are using a version control system like Git. Defaults -to TRUE. The value is ignored if overwrite_file = FALSE.

    max_line_length

    (integer) The maximum length of a line when formatting. -The default is NULL, which defers to the default line length of stanc.

    quiet

    (logical) Should informational messages be suppressed? The -default is FALSE.

    +to TRUE. The value is ignored if overwrite_file = FALSE.

    -

    Value

    -

    The $format() method returns TRUE (invisibly) if the model -is valid.

    -

    See also

    +
    max_line_length
    +

    (integer) The maximum length of a line when formatting. +The default is NULL, which defers to the default line length of stanc.

    -

    The CmdStanR website -(mc-stan.org/cmdstanr) for online -documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    -

    Other CmdStanModel methods: -model-method-check_syntax, -model-method-compile, -model-method-diagnose, -model-method-generate-quantities, -model-method-optimize, -model-method-sample_mpi, -model-method-sample, -model-method-variables, -model-method-variational

    +
    quiet
    +

    (logical) Should informational messages be suppressed? The +default is FALSE.

    + +
    +
    +

    Value

    + -

    Examples

    -
    # \dontrun{ -file <- write_stan_file(" -data { - int N; - int y[N]; -} -parameters { - real lambda; -} -model { - target += - poisson_log(y | lambda); -} -") -mod <- cmdstan_model(file, compile = FALSE) -mod$format(canonicalize = TRUE) -
    #> Semantic error in '/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/model_44779dbadddd7550b5164157b77f4d11.stan', line 11, column 1 to column 24: -#> ------------------------------------------------- -#> 9: model { -#> 10: target += -#> 11: poisson_log(y | lambda); -#> ^ -#> 12: } -#> 13: -#> ------------------------------------------------- -#> -#> Only functions with names ending in _lpdf, _lupdf, _lpmf, _lupmf, _cdf, _lcdf, _lccdf can make use of conditional notation.
    #> Error: Syntax error found! See the message above for more information.
    # } +

    The $format() method returns TRUE (invisibly) if the model +is valid.

    +
    +
    +

    See also

    + +
    -
    +
    +

    Examples

    +
    # \dontrun{
    +file <- write_stan_file("
    +data {
    +  int N;
    +  int y[N];
    +}
    +parameters {
    +  real                     lambda;
    +}
    +model {
    +  target +=
    + poisson_lpmf(y | lambda);
    +}
    +")
    +mod <- cmdstan_model(file, compile = FALSE)
    +mod$format(canonicalize = TRUE)
    +#> data {
    +#>   int N;
    +#>   array[N] int y;
    +#> }
    +#> parameters {
    +#>   real lambda;
    +#> }
    +#> model {
    +#>   target += poisson_lpmf(y | lambda);
    +#> }
    +#> 
    +#> 
    +# }
    +
    +
    +
    +
    -
    - - + + diff --git a/docs/reference/model-method-generate-quantities.html b/docs/reference/model-method-generate-quantities.html index 8e5b08ce5..63e57c29a 100644 --- a/docs/reference/model-method-generate-quantities.html +++ b/docs/reference/model-method-generate-quantities.html @@ -1,77 +1,14 @@ - - - - - - - -Run Stan's standalone generated quantities method — model-method-generate-quantities • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run Stan's standalone generated quantities method — model-method-generate-quantities • cmdstanr - - - - - - - - - - - + + - - - -
    -
    - -
    - -
    +
    -

    The $generate_quantities() method of a CmdStanModel object +

    The $generate_quantities() method of a CmdStanModel object runs Stan's standalone generated quantities to obtain generated quantities based on previously fitted parameters.

    -
    generate_quantities(
    -  fitted_params,
    -  data = NULL,
    -  seed = NULL,
    -  output_dir = NULL,
    -  output_basename = NULL,
    -  sig_figs = NULL,
    -  parallel_chains = getOption("mc.cores", 1),
    -  threads_per_chain = NULL,
    -  opencl_ids = NULL
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    fitted_params

    (multiple options) The parameter draws to use. One of -the following:

    +
    +
    generate_quantities(
    +  fitted_params,
    +  data = NULL,
    +  seed = NULL,
    +  output_dir = NULL,
    +  output_basename = NULL,
    +  sig_figs = NULL,
    +  parallel_chains = getOption("mc.cores", 1),
    +  threads_per_chain = NULL,
    +  opencl_ids = NULL
    +)
    +
    -

    NOTE: if you plan on making many calls to $generate_quantities() then the +

    +

    Arguments

    +
    fitted_params
    +

    (multiple options) The parameter draws to use. One of +the following:

    NOTE: if you plan on making many calls to $generate_quantities() then the most efficient option is to pass the paths of the CmdStan CSV output files (this avoids CmdStanR having to rewrite the draws contained in the fitted model object to CSV each time). If you no longer have the CSV files you can -use draws_to_csv() once to write them and then pass the resulting file -paths to $generate_quantities() as many times as needed.

    data

    (multiple options) The data to use for the variables specified in -the data block of the Stan program. One of the following:

      -
    • A named list of R objects with the names corresponding to variables +use draws_to_csv() once to write them and then pass the resulting file +paths to $generate_quantities() as many times as needed.

      + + +
      data
      +

      (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

      • A named list of R objects with the names corresponding to variables declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using write_stan_json(). See -write_stan_json() for details on the conversions performed on R objects +written to JSON for CmdStan using write_stan_json(). See +write_stan_json() for details on the conversions performed on R objects before they are passed to Stan.

      • A path to a data file compatible with CmdStan (JSON or R dump). See the appendices in the CmdStan guide for details on using these formats.

      • NULL or an empty list if the Stan program has no data block.

      • -
    seed

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. + + + +

    seed
    +

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single seed will automatically be augmented by the the run (chain) ID so that each chain uses a different seed. The exception is the transformed data block, which defaults to @@ -239,198 +163,200 @@

    Arg chains if RNG functions are used. The only time seed should be specified as a vector (one element per chain) is if RNG functions are used in transformed data and the goal is to generate different data for each -chain.

    output_dir

    (string) A path to a directory where CmdStan should write +chain.

    + + +
    output_dir
    +

    (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at NULL (temporary directory) since CmdStanR makes the CmdStan output (posterior draws and diagnostics) available in R via methods of the fitted -model objects. The behavior of output_dir is as follows:

      -
    • If NULL (the default), then the CSV files are written to a temporary +model objects. The behavior of output_dir is as follows:

      • If NULL (the default), then the CSV files are written to a temporary directory and only saved permanently if the user calls one of the $save_* methods of the fitted model object (e.g., -$save_output_files()). These temporary +$save_output_files()). These temporary files are removed when the fitted model object is -garbage collected (manually or automatically).

      • +garbage collected (manually or automatically).

      • If a path, then the files are created in output_dir with names corresponding to the defaults used by $save_output_files().

      • -
    output_basename

    (string) A string to use as a prefix for the names of + + + +

    output_basename
    +

    (string) A string to use as a prefix for the names of the output CSV files of CmdStan. If NULL (the default), the basename of the output CSV files will be comprised from the model name, timestamp, and -5 random characters.

    sig_figs

    (positive integer) The number of significant figures used +5 random characters.

    + + +
    sig_figs
    +

    (positive integer) The number of significant figures used when storing the output values. By default, CmdStan represent the output values with 6 significant figures. The upper limit for sig_figs is 18. Increasing this value will result in larger output CSV files and thus an -increased usage of disk space.

    parallel_chains

    (positive integer) The maximum number of MCMC chains +increased usage of disk space.

    + + +
    parallel_chains
    +

    (positive integer) The maximum number of MCMC chains to run in parallel. If parallel_chains is not specified then the default is to look for the option "mc.cores", which can be set for an entire R -session by options(mc.cores=value). If the "mc.cores" option has not -been set then the default is 1.

    threads_per_chain

    (positive integer) If the model was -compiled with threading support, the number of +session by options(mc.cores=value). If the "mc.cores" option has not +been set then the default is 1.

    + + +
    threads_per_chain
    +

    (positive integer) If the model was +compiled with threading support, the number of threads to use in parallelized sections within an MCMC chain (e.g., when using the Stan functions reduce_sum() or map_rect()). This is in contrast with parallel_chains, which specifies the number of chains to run in parallel. The actual number of CPU cores used is parallel_chains*threads_per_chain. For an example of using threading see the Stan case study -Reduce Sum: A Minimal Example.

    opencl_ids

    (integer vector of length 2) The platform and +Reduce Sum: A Minimal Example.

    + + +
    opencl_ids
    +

    (integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must be compiled with cpp_options = list(stan_opencl = TRUE) for this -argument to have an effect.

    - -

    Value

    +argument to have an effect.

    -

    A CmdStanGQ object.

    -

    See also

    +
    +
    +

    Value

    + -

    The CmdStanR website -(mc-stan.org/cmdstanr) for online +

    A CmdStanGQ object.

    +
    +
    +

    See also

    +

    The CmdStanR website +(mc-stan.org/cmdstanr) for online documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    - -

    Other CmdStanModel methods: -model-method-check_syntax, -model-method-compile, -model-method-diagnose, -model-method-format, -model-method-optimize, -model-method-sample_mpi, -model-method-sample, -model-method-variables, -model-method-variational

    - -

    Examples

    -
    # \dontrun{ -# first fit a model using MCMC -mcmc_program <- write_stan_file( - "data { - int<lower=0> N; - int<lower=0,upper=1> y[N]; - } - parameters { - real<lower=0,upper=1> theta; - } - model { - y ~ bernoulli(theta); - }" -) -mod_mcmc <- cmdstan_model(mcmc_program) - -data <- list(N = 10, y = c(1,1,0,0,0,1,0,1,0,0)) -fit_mcmc <- mod_mcmc$sample(data = data, seed = 123, refresh = 0) -
    #> Running MCMC with 4 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> Chain 3 finished in 0.0 seconds. -#> Chain 4 finished in 0.0 seconds. -#> -#> All 4 chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.6 seconds. -#>
    -# stan program for standalone generated quantities -# (could keep model block, but not necessary so removing it) -gq_program <- write_stan_file( - "data { - int<lower=0> N; - int<lower=0,upper=1> y[N]; - } - parameters { - real<lower=0,upper=1> theta; - } - generated quantities { - int y_rep[N] = bernoulli_rng(rep_vector(theta, N)); - }" -) - -mod_gq <- cmdstan_model(gq_program) -fit_gq <- mod_gq$generate_quantities(fit_mcmc, data = data, seed = 123) -
    #> Running standalone generated quantities after 4 MCMC chains, 1 chain at a time ... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> Chain 3 finished in 0.0 seconds. -#> Chain 4 finished in 0.0 seconds. -#> -#> All 4 chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.5 seconds.
    str(fit_gq$draws()) -
    #> 'draws_array' int [1:1000, 1:4, 1:10] 0 0 0 0 1 0 1 0 0 0 ... -#> - attr(*, "dimnames")=List of 3 -#> ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... -#> ..$ chain : chr [1:4] "1" "2" "3" "4" -#> ..$ variable : chr [1:10] "y_rep[1]" "y_rep[2]" "y_rep[3]" "y_rep[4]" ...
    -library(posterior) -as_draws_df(fit_gq$draws()) -
    #> # A draws_df: 1000 iterations, 4 chains, and 10 variables -#> y_rep[1] y_rep[2] y_rep[3] y_rep[4] y_rep[5] y_rep[6] y_rep[7] y_rep[8] -#> 1 0 0 0 0 0 0 0 0 -#> 2 0 1 1 1 1 1 1 1 -#> 3 0 0 0 0 0 1 1 1 -#> 4 0 0 0 0 0 1 0 1 -#> 5 1 1 1 1 1 0 1 0 -#> 6 0 0 0 0 1 0 1 1 -#> 7 1 0 0 0 1 1 1 0 -#> 8 0 1 0 0 0 1 0 1 -#> 9 0 1 0 1 1 1 1 1 -#> 10 0 1 0 1 0 0 0 0 -#> # ... with 3990 more draws, and 2 more variables -#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    # } - -
    +

    The Stan and CmdStan documentation:

    Other CmdStanModel methods: +model-method-check_syntax, +model-method-compile, +model-method-diagnose, +model-method-expose_functions, +model-method-format, +model-method-optimize, +model-method-sample_mpi, +model-method-sample, +model-method-variables, +model-method-variational

    +
    + +
    +

    Examples

    +
    # \dontrun{
    +# first fit a model using MCMC
    +mcmc_program <- write_stan_file(
    +  "data {
    +    int<lower=0> N;
    +    int<lower=0,upper=1> y[N];
    +  }
    +  parameters {
    +    real<lower=0,upper=1> theta;
    +  }
    +  model {
    +    y ~ bernoulli(theta);
    +  }"
    +)
    +mod_mcmc <- cmdstan_model(mcmc_program)
    +
    +data <- list(N = 10, y = c(1,1,0,0,0,1,0,1,0,0))
    +fit_mcmc <- mod_mcmc$sample(data = data, seed = 123, refresh = 0)
    +#> Running MCMC with 4 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> Chain 3 finished in 0.0 seconds.
    +#> Chain 4 finished in 0.0 seconds.
    +#> 
    +#> All 4 chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.6 seconds.
    +#> 
    +
    +# stan program for standalone generated quantities
    +# (could keep model block, but not necessary so removing it)
    +gq_program <- write_stan_file(
    +  "data {
    +    int<lower=0> N;
    +    int<lower=0,upper=1> y[N];
    +  }
    +  parameters {
    +    real<lower=0,upper=1> theta;
    +  }
    +  generated quantities {
    +    int y_rep[N] = bernoulli_rng(rep_vector(theta, N));
    +  }"
    +)
    +
    +mod_gq <- cmdstan_model(gq_program)
    +fit_gq <- mod_gq$generate_quantities(fit_mcmc, data = data, seed = 123)
    +#> Running standalone generated quantities after 4 MCMC chains, 1 chain at a time ...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> Chain 3 finished in 0.0 seconds.
    +#> Chain 4 finished in 0.0 seconds.
    +#> 
    +#> All 4 chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.5 seconds.
    +str(fit_gq$draws())
    +#>  'draws_array' int [1:1000, 1:4, 1:10] 0 0 0 0 1 0 1 0 0 0 ...
    +#>  - attr(*, "dimnames")=List of 3
    +#>   ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
    +#>   ..$ chain    : chr [1:4] "1" "2" "3" "4"
    +#>   ..$ variable : chr [1:10] "y_rep[1]" "y_rep[2]" "y_rep[3]" "y_rep[4]" ...
    +
    +library(posterior)
    +as_draws_df(fit_gq$draws())
    +#> # A draws_df: 1000 iterations, 4 chains, and 10 variables
    +#>    y_rep[1] y_rep[2] y_rep[3] y_rep[4] y_rep[5] y_rep[6] y_rep[7] y_rep[8]
    +#> 1         0        0        0        0        0        0        0        0
    +#> 2         0        1        1        1        1        1        1        1
    +#> 3         0        0        0        0        0        1        1        1
    +#> 4         0        0        0        0        0        1        0        1
    +#> 5         1        1        1        1        1        0        1        0
    +#> 6         0        0        0        0        1        0        1        1
    +#> 7         1        0        0        0        1        1        1        0
    +#> 8         0        1        0        0        0        1        0        1
    +#> 9         0        1        0        1        1        1        1        1
    +#> 10        0        1        0        1        0        0        0        0
    +#> # ... with 3990 more draws, and 2 more variables
    +#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/model-method-optimize-1.png b/docs/reference/model-method-optimize-1.png index d7b8b97f8..27fd88217 100644 Binary files a/docs/reference/model-method-optimize-1.png and b/docs/reference/model-method-optimize-1.png differ diff --git a/docs/reference/model-method-optimize-2.png b/docs/reference/model-method-optimize-2.png index 093449f41..4ed9fe5c5 100644 Binary files a/docs/reference/model-method-optimize-2.png and b/docs/reference/model-method-optimize-2.png differ diff --git a/docs/reference/model-method-optimize.html b/docs/reference/model-method-optimize.html index ff22f0e99..b4d1ad67a 100644 --- a/docs/reference/model-method-optimize.html +++ b/docs/reference/model-method-optimize.html @@ -1,81 +1,18 @@ - - - - - - - -Run Stan's optimization algorithms — model-method-optimize • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run Stan's optimization algorithms — model-method-optimize • cmdstanr - - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +
    -

    The $optimize() method of a CmdStanModel object runs +

    The $optimize() method of a CmdStanModel object runs Stan's optimizer to obtain a posterior mode (penalized maximum likelihood) estimate.

    Any argument left as NULL will default to the default value used by the installed version of CmdStan. See the -CmdStan User’s Guide +CmdStan User’s Guide for more details.

    -
    optimize(
    -  data = NULL,
    -  seed = NULL,
    -  refresh = NULL,
    -  init = NULL,
    -  save_latent_dynamics = FALSE,
    -  output_dir = NULL,
    -  output_basename = NULL,
    -  sig_figs = NULL,
    -  threads = NULL,
    -  opencl_ids = NULL,
    -  algorithm = NULL,
    -  init_alpha = NULL,
    -  iter = NULL,
    -  tol_obj = NULL,
    -  tol_rel_obj = NULL,
    -  tol_grad = NULL,
    -  tol_rel_grad = NULL,
    -  tol_param = NULL,
    -  history_size = NULL
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    data

    (multiple options) The data to use for the variables specified in -the data block of the Stan program. One of the following:

      -
    • A named list of R objects with the names corresponding to variables +

      +
      optimize(
      +  data = NULL,
      +  seed = NULL,
      +  refresh = NULL,
      +  init = NULL,
      +  save_latent_dynamics = FALSE,
      +  output_dir = NULL,
      +  output_basename = NULL,
      +  sig_figs = NULL,
      +  threads = NULL,
      +  opencl_ids = NULL,
      +  algorithm = NULL,
      +  init_alpha = NULL,
      +  iter = NULL,
      +  tol_obj = NULL,
      +  tol_rel_obj = NULL,
      +  tol_grad = NULL,
      +  tol_rel_grad = NULL,
      +  tol_param = NULL,
      +  history_size = NULL
      +)
      +
      + +
      +

      Arguments

      +
      data
      +

      (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

      • A named list of R objects with the names corresponding to variables declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using write_stan_json(). See -write_stan_json() for details on the conversions performed on R objects +written to JSON for CmdStan using write_stan_json(). See +write_stan_json() for details on the conversions performed on R objects before they are passed to Stan.

      • A path to a data file compatible with CmdStan (JSON or R dump). See the appendices in the CmdStan guide for details on using these formats.

      • NULL or an empty list if the Stan program has no data block.

      • -
    seed

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. + + + +

    seed
    +

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single seed will automatically be augmented by the the run (chain) ID so that each chain uses a different seed. The exception is the transformed data block, which defaults to @@ -240,25 +167,24 @@

    Arg chains if RNG functions are used. The only time seed should be specified as a vector (one element per chain) is if RNG functions are used in transformed data and the goal is to generate different data for each -chain.

    refresh

    (non-negative integer) The number of iterations between +chain.

    + + +
    refresh
    +

    (non-negative integer) The number of iterations between printed screen updates. If refresh = 0, only error messages will be -printed.

    init

    (multiple options) The initialization method to use for the +printed.

    + + +
    init
    +

    (multiple options) The initialization method to use for the variables declared in the parameters block of the Stan program. One of -the following:

      -
    • A real number x>0. This initializes all parameters randomly between +the following:

      • A real number x>0. This initializes all parameters randomly between [-x,x] on the unconstrained parameter space.;

      • The number 0. This initializes all parameters to 0;

      • A character vector of paths (one per chain) to JSON or Rdump files containing initial values for all or some parameters. See -write_stan_json() to write R objects to JSON files compatible with +write_stan_json() to write R objects to JSON files compatible with CmdStan.

      • A list of lists containing initial values for all or some parameters. For MCMC the list should contain a sublist for each chain. For optimization and @@ -271,494 +197,515 @@

        Arg has argument chain_id it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See Examples.

      • -
    save_latent_dynamics

    (logical) Should auxiliary diagnostic information + + + +

    save_latent_dynamics
    +

    (logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's diagnostic_file argument and the content written to CSV is controlled by the user's CmdStan installation and not CmdStanR (for some algorithms no content may be written). The default is FALSE, which is appropriate for almost every use case. To save the temporary files created when save_latent_dynamics=TRUE see the -$save_latent_dynamics_files() -method.

    output_dir

    (string) A path to a directory where CmdStan should write +$save_latent_dynamics_files() +method.

    + + +
    output_dir
    +

    (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at NULL (temporary directory) since CmdStanR makes the CmdStan output (posterior draws and diagnostics) available in R via methods of the fitted -model objects. The behavior of output_dir is as follows:

      -
    • If NULL (the default), then the CSV files are written to a temporary +model objects. The behavior of output_dir is as follows:

      • If NULL (the default), then the CSV files are written to a temporary directory and only saved permanently if the user calls one of the $save_* methods of the fitted model object (e.g., -$save_output_files()). These temporary +$save_output_files()). These temporary files are removed when the fitted model object is -garbage collected (manually or automatically).

      • +garbage collected (manually or automatically).

      • If a path, then the files are created in output_dir with names corresponding to the defaults used by $save_output_files().

      • -
    output_basename

    (string) A string to use as a prefix for the names of + + + +

    output_basename
    +

    (string) A string to use as a prefix for the names of the output CSV files of CmdStan. If NULL (the default), the basename of the output CSV files will be comprised from the model name, timestamp, and -5 random characters.

    sig_figs

    (positive integer) The number of significant figures used +5 random characters.

    + + +
    sig_figs
    +

    (positive integer) The number of significant figures used when storing the output values. By default, CmdStan represent the output values with 6 significant figures. The upper limit for sig_figs is 18. Increasing this value will result in larger output CSV files and thus an -increased usage of disk space.

    threads

    (positive integer) If the model was -compiled with threading support, the number of +increased usage of disk space.

    + + +
    threads
    +

    (positive integer) If the model was +compiled with threading support, the number of threads to use in parallelized sections (e.g., when -using the Stan functions reduce_sum() or map_rect()).

    opencl_ids

    (integer vector of length 2) The platform and +using the Stan functions reduce_sum() or map_rect()).

    + + +
    opencl_ids
    +

    (integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must be compiled with cpp_options = list(stan_opencl = TRUE) for this -argument to have an effect.

    algorithm

    (string) The optimization algorithm. One of "lbfgs", +argument to have an effect.

    + + +
    algorithm
    +

    (string) The optimization algorithm. One of "lbfgs", "bfgs", or "newton". The control parameters below are only available for "lbfgs" and "bfgs. For their default values and more details see the CmdStan User's Guide. The default values can also be obtained by -running cmdstanr_example(method="optimize")$metadata().

    init_alpha

    (positive real) The initial step size parameter.

    iter

    (positive integer) The maximum number of iterations.

    tol_obj

    (positive real) Convergence tolerance on changes in objective function value.

    tol_rel_obj

    (positive real) Convergence tolerance on relative changes in objective function value.

    tol_grad

    (positive real) Convergence tolerance on the norm of the gradient.

    tol_rel_grad

    (positive real) Convergence tolerance on the relative norm of the gradient.

    tol_param

    (positive real) Convergence tolerance on changes in parameter value.

    history_size

    (positive integer) The size of the history used when -approximating the Hessian. Only available for L-BFGS.

    - -

    Value

    - -

    A CmdStanMLE object.

    -

    Details

    +running cmdstanr_example(method="optimize")$metadata().

    + + +
    init_alpha
    +

    (positive real) The initial step size parameter.

    + + +
    iter
    +

    (positive integer) The maximum number of iterations.

    + + +
    tol_obj
    +

    (positive real) Convergence tolerance on changes in objective function value.

    + +
    tol_rel_obj
    +

    (positive real) Convergence tolerance on relative changes in objective function value.

    + + +
    tol_grad
    +

    (positive real) Convergence tolerance on the norm of the gradient.

    + + +
    tol_rel_grad
    +

    (positive real) Convergence tolerance on the relative norm of the gradient.

    + + +
    tol_param
    +

    (positive real) Convergence tolerance on changes in parameter value.

    + + +
    history_size
    +

    (positive integer) The size of the history used when +approximating the Hessian. Only available for L-BFGS.

    + +
    +
    +

    Value

    + + +

    A CmdStanMLE object.

    +
    +
    +

    Details

    CmdStan can find the posterior mode (assuming there is one). If the posterior is not convex, there is no guarantee Stan will be able to find the global mode as opposed to a local optimum of log probability. For optimization, the mode is calculated without the Jacobian adjustment for constrained variables, which shifts the mode due to the change of variables. Thus modes correspond to modes of the model as written.

    -

    -- CmdStan User's Guide

    -

    See also

    - -

    The CmdStanR website -(mc-stan.org/cmdstanr) for online +

    -- CmdStan User's Guide

    +
    +
    +

    See also

    +

    The CmdStanR website +(mc-stan.org/cmdstanr) for online documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    - -

    Other CmdStanModel methods: -model-method-check_syntax, -model-method-compile, -model-method-diagnose, -model-method-format, -model-method-generate-quantities, -model-method-sample_mpi, -model-method-sample, -model-method-variables, -model-method-variational

    - -

    Examples

    -
    # \dontrun{ -library(cmdstanr) -library(posterior) -library(bayesplot) -color_scheme_set("brightblue") - -# Set path to CmdStan -# (Note: if you installed CmdStan via install_cmdstan() with default settings -# then setting the path is unnecessary but the default below should still work. -# Otherwise use the `path` argument to specify the location of your -# CmdStan installation.) -set_cmdstan_path(path = NULL) -
    #> CmdStan path set to: /Users/jgabry/.cmdstan/cmdstan-2.29.1
    -# Create a CmdStanModel object from a Stan program, -# here using the example model that comes with CmdStan -file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan") -mod <- cmdstan_model(file) -mod$print() -
    #> data { -#> int<lower=0> N; -#> array[N] int<lower=0,upper=1> y; // or int<lower=0,upper=1> y[N]; -#> } -#> parameters { -#> real<lower=0,upper=1> theta; -#> } -#> model { -#> theta ~ beta(1,1); // uniform prior on interval 0,1 -#> y ~ bernoulli(theta); -#> }
    -# Data as a named list (like RStan) -stan_data <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) - -# Run MCMC using the 'sample' method -fit_mcmc <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - parallel_chains = 2 -) -
    #> Running MCMC with 2 parallel chains... -#> -#> Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup) -#> Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup) -#> Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup) -#> Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup) -#> Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup) -#> Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup) -#> Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup) -#> Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup) -#> Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup) -#> Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup) -#> Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup) -#> Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling) -#> Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling) -#> Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling) -#> Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling) -#> Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling) -#> Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling) -#> Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling) -#> Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling) -#> Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling) -#> Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling) -#> Chain 1 Iteration: 2000 / 2000 [100%] (Sampling) -#> Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup) -#> Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup) -#> Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup) -#> Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup) -#> Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup) -#> Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup) -#> Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup) -#> Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup) -#> Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup) -#> Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup) -#> Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup) -#> Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling) -#> Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling) -#> Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling) -#> Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling) -#> Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling) -#> Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling) -#> Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling) -#> Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling) -#> Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling) -#> Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling) -#> Chain 2 Iteration: 2000 / 2000 [100%] (Sampling) -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.2 seconds. -#>
    -# Use 'posterior' package for summaries -fit_mcmc$summary() -
    #> # A tibble: 2 × 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -7.30 -7.03 0.721 0.380 -8.82 -6.75 1.00 902. 1006. -#> 2 theta 0.247 0.233 0.122 0.129 0.0786 0.470 1.00 762. 712.
    -# Get posterior draws -draws <- fit_mcmc$draws() -print(draws) -
    #> # A draws_array: 1000 iterations, 2 chains, and 2 variables -#> , , variable = lp__ -#> -#> chain -#> iteration 1 2 -#> 1 -6.8 -6.8 -#> 2 -6.9 -6.8 -#> 3 -7.0 -7.0 -#> 4 -6.9 -7.1 -#> 5 -6.7 -7.0 -#> -#> , , variable = theta -#> -#> chain -#> iteration 1 2 -#> 1 0.28 0.21 -#> 2 0.19 0.20 -#> 3 0.16 0.17 -#> 4 0.20 0.36 -#> 5 0.25 0.34 -#> -#> # ... with 995 more iterations
    -# Convert to data frame using posterior::as_draws_df -as_draws_df(draws) -
    #> # A draws_df: 1000 iterations, 2 chains, and 2 variables -#> lp__ theta -#> 1 -6.8 0.28 -#> 2 -6.9 0.19 -#> 3 -7.0 0.16 -#> 4 -6.9 0.20 -#> 5 -6.7 0.25 -#> 6 -7.1 0.36 -#> 7 -9.0 0.55 -#> 8 -7.2 0.15 -#> 9 -6.8 0.23 -#> 10 -7.5 0.42 -#> # ... with 1990 more draws -#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    -# Plot posterior using bayesplot (ggplot2) -mcmc_hist(fit_mcmc$draws("theta")) -
    #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    -# Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose() -
    #> Processing csv files: /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/bernoulli-202203181226-1-73471f.csv, /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/bernoulli-202203181226-2-73471f.csv -#> -#> Checking sampler transitions treedepth. -#> Treedepth satisfactory for all transitions. -#> -#> Checking sampler transitions for divergences. -#> No divergent transitions found. -#> -#> Checking E-BFMI - sampler transitions HMC potential energy. -#> E-BFMI satisfactory. -#> -#> Effective sample size satisfactory. -#> -#> Split R-hat values satisfactory all parameters. -#> -#> Processing complete, no problems detected.
    fit_mcmc$cmdstan_summary() -
    #> Inference for Stan model: bernoulli_model -#> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. -#> -#> Warmup took (0.0050, 0.0050) seconds, 0.010 seconds total -#> Sampling took (0.015, 0.014) seconds, 0.029 seconds total -#> -#> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat -#> -#> lp__ -7.3 2.6e-02 0.72 -8.8 -7.0 -6.8 781 26932 1.0 -#> accept_stat__ 0.92 8.3e-03 0.13 0.64 0.97 1.0 2.3e+02 8.1e+03 1.0e+00 -#> stepsize__ 0.95 7.9e-02 0.079 0.87 1.0 1.0 1.0e+00 3.5e+01 2.0e+13 -#> treedepth__ 1.4 1.1e-02 0.48 1.0 1.0 2.0 1.9e+03 6.5e+04 1.0e+00 -#> n_leapfrog__ 2.5 1.4e-01 1.3 1.0 3.0 3.0 8.9e+01 3.1e+03 1.0e+00 -#> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 3.6e-02 1.00 6.8 7.5 9.6 7.7e+02 2.7e+04 1.0e+00 -#> -#> theta 0.25 4.3e-03 0.12 0.079 0.23 0.47 796 27460 1.0 -#> -#> Samples were drawn using hmc with nuts. -#> For each parameter, N_Eff is a crude measure of effective sample size, -#> and R_hat is the potential scale reduction factor on split chains (at -#> convergence, R_hat=1).
    -# For models fit using MCMC, if you like working with RStan's stanfit objects -# then you can create one with rstan::read_stan_csv() - -# stanfit <- rstan::read_stan_csv(fit_mcmc$output_files()) - - -# Run 'optimize' method to get a point estimate (default is Stan's LBFGS algorithm) -# and also demonstrate specifying data as a path to a file instead of a list -my_data_file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.data.json") -fit_optim <- mod$optimize(data = my_data_file, seed = 123) -
    #> Initial log joint probability = -9.51104 -#> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes -#> 6 -5.00402 0.000103557 2.55661e-07 1 1 9 -#> Optimization terminated normally: -#> Convergence detected: relative gradient magnitude is below tolerance -#> Finished in 0.1 seconds.
    -fit_optim$summary() -
    #> # A tibble: 2 × 2 -#> variable estimate -#> <chr> <dbl> -#> 1 lp__ -5.00 -#> 2 theta 0.2
    - -# Run 'variational' method to approximate the posterior (default is meanfield ADVI) -fit_vb <- mod$variational(data = stan_data, seed = 123) -
    #> ------------------------------------------------------------ -#> EXPERIMENTAL ALGORITHM: -#> This procedure has not been thoroughly tested and may be unstable -#> or buggy. The interface is subject to change. -#> ------------------------------------------------------------ -#> Gradient evaluation took 9e-06 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds. -#> Adjust your expectations accordingly! -#> Begin eta adaptation. -#> Iteration: 1 / 250 [ 0%] (Adaptation) -#> Iteration: 50 / 250 [ 20%] (Adaptation) -#> Iteration: 100 / 250 [ 40%] (Adaptation) -#> Iteration: 150 / 250 [ 60%] (Adaptation) -#> Iteration: 200 / 250 [ 80%] (Adaptation) -#> Success! Found best value [eta = 1] earlier than expected. -#> Begin stochastic gradient ascent. -#> iter ELBO delta_ELBO_mean delta_ELBO_med notes -#> 100 -6.262 1.000 1.000 -#> 200 -6.263 0.500 1.000 -#> 300 -6.307 0.336 0.007 MEDIAN ELBO CONVERGED -#> Drawing a sample of size 1000 from the approximate posterior... -#> COMPLETED. -#> Finished in 0.1 seconds.
    -fit_vb$summary() -
    #> # A tibble: 3 × 7 -#> variable mean median sd mad q5 q95 -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -7.18 -6.94 0.588 0.259 -8.36 -6.75 -#> 2 lp_approx__ -0.515 -0.221 0.692 0.303 -2.06 -0.00257 -#> 3 theta 0.263 0.246 0.115 0.113 0.106 0.481
    -# Plot approximate posterior using bayesplot -mcmc_hist(fit_vb$draws("theta")) -
    #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    - -# Specifying initial values as a function -fit_mcmc_w_init_fun <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = function() list(theta = runif(1)) -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds. -#>
    fit_mcmc_w_init_fun_2 <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = function(chain_id) { - # silly but demonstrates optional use of chain_id - list(theta = 1 / (chain_id + 1)) - } -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds. -#>
    fit_mcmc_w_init_fun_2$init() -
    #> [[1]] -#> [[1]]$theta -#> [1] 0.5 -#> -#> -#> [[2]] -#> [[2]]$theta -#> [1] 0.3333333 -#> -#>
    -# Specifying initial values as a list of lists -fit_mcmc_w_init_list <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = list( - list(theta = 0.75), # chain 1 - list(theta = 0.25) # chain 2 - ) -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.5 seconds. -#>
    fit_optim_w_init_list <- mod$optimize( - data = stan_data, - seed = 123, - init = list( - list(theta = 0.75) - ) -) -
    #> Initial log joint probability = -11.6657 -#> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes -#> 6 -5.00402 0.000237915 9.55309e-07 1 1 9 -#> Optimization terminated normally: -#> Convergence detected: relative gradient magnitude is below tolerance -#> Finished in 0.1 seconds.
    fit_optim_w_init_list$init() -
    #> [[1]] -#> [[1]]$theta -#> [1] 0.75 -#> -#>
    # } - -
    +

    The Stan and CmdStan documentation:

    Other CmdStanModel methods: +model-method-check_syntax, +model-method-compile, +model-method-diagnose, +model-method-expose_functions, +model-method-format, +model-method-generate-quantities, +model-method-sample_mpi, +model-method-sample, +model-method-variables, +model-method-variational

    +
    + +
    +

    Examples

    +
    # \dontrun{
    +library(cmdstanr)
    +library(posterior)
    +library(bayesplot)
    +color_scheme_set("brightblue")
    +
    +# Set path to CmdStan
    +# (Note: if you installed CmdStan via install_cmdstan() with default settings
    +# then setting the path is unnecessary but the default below should still work.
    +# Otherwise use the `path` argument to specify the location of your
    +# CmdStan installation.)
    +set_cmdstan_path(path = NULL)
    +#> CmdStan path set to: /Users/jgabry/.cmdstan/cmdstan-2.32.2
    +
    +# Create a CmdStanModel object from a Stan program,
    +# here using the example model that comes with CmdStan
    +file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan")
    +mod <- cmdstan_model(file)
    +mod$print()
    +#> data {
    +#>   int<lower=0> N;
    +#>   array[N] int<lower=0,upper=1> y;
    +#> }
    +#> parameters {
    +#>   real<lower=0,upper=1> theta;
    +#> }
    +#> model {
    +#>   theta ~ beta(1,1);  // uniform prior on interval 0,1
    +#>   y ~ bernoulli(theta);
    +#> }
    +
    +# Data as a named list (like RStan)
    +stan_data <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1))
    +
    +# Run MCMC using the 'sample' method
    +fit_mcmc <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  parallel_chains = 2
    +)
    +#> Running MCMC with 2 parallel chains...
    +#> 
    +#> Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
    +#> Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
    +#> Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
    +#> Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
    +#> Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
    +#> Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
    +#> Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
    +#> Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
    +#> Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
    +#> Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
    +#> Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
    +#> Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
    +#> Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
    +#> Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
    +#> Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
    +#> Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
    +#> Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
    +#> Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
    +#> Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
    +#> Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
    +#> Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
    +#> Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
    +#> Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup) 
    +#> Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
    +#> Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
    +#> Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
    +#> Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
    +#> Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
    +#> Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
    +#> Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
    +#> Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
    +#> Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
    +#> Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
    +#> Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
    +#> Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
    +#> Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
    +#> Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
    +#> Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
    +#> Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
    +#> Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
    +#> Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
    +#> Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
    +#> Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
    +#> Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.2 seconds.
    +#> 
    +
    +# Use 'posterior' package for summaries
    +fit_mcmc$summary()
    +#> # A tibble: 2 × 10
    +#>   variable   mean median    sd   mad      q5    q95  rhat ess_bulk ess_tail
    +#>   <chr>     <num>  <num> <num> <num>   <num>  <num> <num>    <num>    <num>
    +#> 1 lp__     -7.30  -7.03  0.721 0.380 -8.82   -6.75   1.00     902.    1006.
    +#> 2 theta     0.247  0.233 0.122 0.129  0.0786  0.470  1.00     762.     712.
    +
    +# Get posterior draws
    +draws <- fit_mcmc$draws()
    +print(draws)
    +#> # A draws_array: 1000 iterations, 2 chains, and 2 variables
    +#> , , variable = lp__
    +#> 
    +#>          chain
    +#> iteration    1    2
    +#>         1 -6.8 -6.8
    +#>         2 -6.9 -6.8
    +#>         3 -7.0 -7.0
    +#>         4 -6.9 -7.1
    +#>         5 -6.7 -7.0
    +#> 
    +#> , , variable = theta
    +#> 
    +#>          chain
    +#> iteration    1    2
    +#>         1 0.28 0.21
    +#>         2 0.19 0.20
    +#>         3 0.16 0.17
    +#>         4 0.20 0.36
    +#>         5 0.25 0.34
    +#> 
    +#> # ... with 995 more iterations
    +
    +# Convert to data frame using posterior::as_draws_df
    +as_draws_df(draws)
    +#> # A draws_df: 1000 iterations, 2 chains, and 2 variables
    +#>    lp__ theta
    +#> 1  -6.8  0.28
    +#> 2  -6.9  0.19
    +#> 3  -7.0  0.16
    +#> 4  -6.9  0.20
    +#> 5  -6.7  0.25
    +#> 6  -7.1  0.36
    +#> 7  -9.0  0.55
    +#> 8  -7.2  0.15
    +#> 9  -6.8  0.23
    +#> 10 -7.5  0.42
    +#> # ... with 1990 more draws
    +#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    +
    +# Plot posterior using bayesplot (ggplot2)
    +mcmc_hist(fit_mcmc$draws("theta"))
    +#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    +
    +
    +# Call CmdStan's diagnose and stansummary utilities
    +fit_mcmc$cmdstan_diagnose()
    +#> Processing csv files: /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/bernoulli-202307251438-1-65b170.csv, /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/bernoulli-202307251438-2-65b170.csv
    +#> 
    +#> Checking sampler transitions treedepth.
    +#> Treedepth satisfactory for all transitions.
    +#> 
    +#> Checking sampler transitions for divergences.
    +#> No divergent transitions found.
    +#> 
    +#> Checking E-BFMI - sampler transitions HMC potential energy.
    +#> E-BFMI satisfactory.
    +#> 
    +#> Effective sample size satisfactory.
    +#> 
    +#> Split R-hat values satisfactory all parameters.
    +#> 
    +#> Processing complete, no problems detected.
    +fit_mcmc$cmdstan_summary()
    +#> Inference for Stan model: bernoulli_model
    +#> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved.
    +#> 
    +#> Warmup took (0.0040, 0.0040) seconds, 0.0080 seconds total
    +#> Sampling took (0.011, 0.011) seconds, 0.022 seconds total
    +#> 
    +#>                 Mean     MCSE  StdDev     5%   50%   95%  N_Eff  N_Eff/s    R_hat
    +#> 
    +#> lp__            -7.3  2.6e-02    0.72   -8.8  -7.0  -6.8    781    35502      1.0
    +#> accept_stat__   0.92  8.3e-03    0.13   0.64  0.97   1.0    235    10662  1.0e+00
    +#> stepsize__      0.95  7.9e-02   0.079   0.87   1.0   1.0    1.0       46  2.0e+13
    +#> treedepth__      1.4  1.1e-02    0.48    1.0   1.0   2.0   1874    85179  1.0e+00
    +#> n_leapfrog__     2.5  1.4e-01     1.3    1.0   3.0   3.0     89     4050  1.0e+00
    +#> divergent__     0.00      nan    0.00   0.00  0.00  0.00    nan      nan      nan
    +#> energy__         7.8  3.6e-02    1.00    6.8   7.5   9.6    775    35215  1.0e+00
    +#> 
    +#> theta           0.25  4.3e-03    0.12  0.079  0.23  0.47    796    36197      1.0
    +#> 
    +#> Samples were drawn using hmc with nuts.
    +#> For each parameter, N_Eff is a crude measure of effective sample size,
    +#> and R_hat is the potential scale reduction factor on split chains (at 
    +#> convergence, R_hat=1).
    +
    +# For models fit using MCMC, if you like working with RStan's stanfit objects
    +# then you can create one with rstan::read_stan_csv()
    +
    +# stanfit <- rstan::read_stan_csv(fit_mcmc$output_files())
    +
    +
    +# Run 'optimize' method to get a point estimate (default is Stan's LBFGS algorithm)
    +# and also demonstrate specifying data as a path to a file instead of a list
    +my_data_file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.data.json")
    +fit_optim <- mod$optimize(data = my_data_file, seed = 123)
    +#> Initial log joint probability = -9.51104 
    +#>     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes  
    +#>        6      -5.00402   0.000103557   2.55661e-07           1           1        9    
    +#> Optimization terminated normally:  
    +#>   Convergence detected: relative gradient magnitude is below tolerance 
    +#> Finished in  0.1 seconds.
    +
    +fit_optim$summary()
    +#> # A tibble: 2 × 2
    +#>   variable estimate
    +#>   <chr>       <num>
    +#> 1 lp__        -5.00
    +#> 2 theta        0.2 
    +
    +
    +# Run 'variational' method to approximate the posterior (default is meanfield ADVI)
    +fit_vb <- mod$variational(data = stan_data, seed = 123)
    +#> ------------------------------------------------------------ 
    +#> EXPERIMENTAL ALGORITHM: 
    +#>   This procedure has not been thoroughly tested and may be unstable 
    +#>   or buggy. The interface is subject to change. 
    +#> ------------------------------------------------------------ 
    +#> Gradient evaluation took 8e-06 seconds 
    +#> 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds. 
    +#> Adjust your expectations accordingly! 
    +#> Begin eta adaptation. 
    +#> Iteration:   1 / 250 [  0%]  (Adaptation) 
    +#> Iteration:  50 / 250 [ 20%]  (Adaptation) 
    +#> Iteration: 100 / 250 [ 40%]  (Adaptation) 
    +#> Iteration: 150 / 250 [ 60%]  (Adaptation) 
    +#> Iteration: 200 / 250 [ 80%]  (Adaptation) 
    +#> Success! Found best value [eta = 1] earlier than expected. 
    +#> Begin stochastic gradient ascent. 
    +#>   iter             ELBO   delta_ELBO_mean   delta_ELBO_med   notes  
    +#>    100           -6.262             1.000            1.000 
    +#>    200           -6.263             0.500            1.000 
    +#>    300           -6.307             0.336            0.007   MEDIAN ELBO CONVERGED 
    +#> Drawing a sample of size 1000 from the approximate posterior...  
    +#> COMPLETED. 
    +#> Finished in  0.1 seconds.
    +
    +fit_vb$summary()
    +#> # A tibble: 3 × 7
    +#>   variable      mean median    sd   mad     q5      q95
    +#>   <chr>        <num>  <num> <num> <num>  <num>    <num>
    +#> 1 lp__        -7.18  -6.94  0.588 0.259 -8.36  -6.75   
    +#> 2 lp_approx__ -0.515 -0.221 0.692 0.303 -2.06  -0.00257
    +#> 3 theta        0.263  0.246 0.115 0.113  0.106  0.481  
    +
    +# Plot approximate posterior using bayesplot
    +mcmc_hist(fit_vb$draws("theta"))
    +#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    +
    +
    +
    +# Specifying initial values as a function
    +fit_mcmc_w_init_fun <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = function() list(theta = runif(1))
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_mcmc_w_init_fun_2 <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = function(chain_id) {
    +    # silly but demonstrates optional use of chain_id
    +    list(theta = 1 / (chain_id + 1))
    +  }
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_mcmc_w_init_fun_2$init()
    +#> [[1]]
    +#> [[1]]$theta
    +#> [1] 0.5
    +#> 
    +#> 
    +#> [[2]]
    +#> [[2]]$theta
    +#> [1] 0.3333333
    +#> 
    +#> 
    +
    +# Specifying initial values as a list of lists
    +fit_mcmc_w_init_list <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = list(
    +    list(theta = 0.75), # chain 1
    +    list(theta = 0.25)  # chain 2
    +  )
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_optim_w_init_list <- mod$optimize(
    +  data = stan_data,
    +  seed = 123,
    +  init = list(
    +    list(theta = 0.75)
    +  )
    +)
    +#> Initial log joint probability = -11.6657 
    +#>     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes  
    +#>        6      -5.00402   0.000237915   9.55309e-07           1           1        9    
    +#> Optimization terminated normally:  
    +#>   Convergence detected: relative gradient magnitude is below tolerance 
    +#> Finished in  0.1 seconds.
    +fit_optim_w_init_list$init()
    +#> [[1]]
    +#> [[1]]$theta
    +#> [1] 0.75
    +#> 
    +#> 
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/model-method-sample-1.png b/docs/reference/model-method-sample-1.png index d7b8b97f8..27fd88217 100644 Binary files a/docs/reference/model-method-sample-1.png and b/docs/reference/model-method-sample-1.png differ diff --git a/docs/reference/model-method-sample-2.png b/docs/reference/model-method-sample-2.png index 093449f41..4ed9fe5c5 100644 Binary files a/docs/reference/model-method-sample-2.png and b/docs/reference/model-method-sample-2.png differ diff --git a/docs/reference/model-method-sample.html b/docs/reference/model-method-sample.html index d84264098..f656c23a7 100644 --- a/docs/reference/model-method-sample.html +++ b/docs/reference/model-method-sample.html @@ -1,82 +1,19 @@ - - - - - - - -Run Stan's MCMC algorithms — model-method-sample • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run Stan's MCMC algorithms — model-method-sample • cmdstanr - - - - - - - - - - - - + + - - -
    -
    - -
    - -
    +
    -

    The $sample() method of a CmdStanModel object runs Stan's +

    The $sample() method of a CmdStanModel object runs Stan's main Markov chain Monte Carlo algorithm.

    Any argument left as NULL will default to the default value used by the installed version of CmdStan. See the -CmdStan User’s Guide +CmdStan User’s Guide for more details.

    After model fitting any diagnostics specified via the diagnostics argument will be checked and warnings will be printed if warranted.

    -
    sample(
    -  data = NULL,
    -  seed = NULL,
    -  refresh = NULL,
    -  init = NULL,
    -  save_latent_dynamics = FALSE,
    -  output_dir = NULL,
    -  output_basename = NULL,
    -  sig_figs = NULL,
    -  chains = 4,
    -  parallel_chains = getOption("mc.cores", 1),
    -  chain_ids = seq_len(chains),
    -  threads_per_chain = NULL,
    -  opencl_ids = NULL,
    -  iter_warmup = NULL,
    -  iter_sampling = NULL,
    -  save_warmup = FALSE,
    -  thin = NULL,
    -  max_treedepth = NULL,
    -  adapt_engaged = TRUE,
    -  adapt_delta = NULL,
    -  step_size = NULL,
    -  metric = NULL,
    -  metric_file = NULL,
    -  inv_metric = NULL,
    -  init_buffer = NULL,
    -  term_buffer = NULL,
    -  window = NULL,
    -  fixed_param = FALSE,
    -  show_messages = TRUE,
    -  diagnostics = c("divergences", "treedepth", "ebfmi"),
    -  cores = NULL,
    -  num_cores = NULL,
    -  num_chains = NULL,
    -  num_warmup = NULL,
    -  num_samples = NULL,
    -  validate_csv = NULL,
    -  save_extra_diagnostics = NULL,
    -  max_depth = NULL,
    -  stepsize = NULL
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    data

    (multiple options) The data to use for the variables specified in -the data block of the Stan program. One of the following:

      -
    • A named list of R objects with the names corresponding to variables +

      +
      sample(
      +  data = NULL,
      +  seed = NULL,
      +  refresh = NULL,
      +  init = NULL,
      +  save_latent_dynamics = FALSE,
      +  output_dir = NULL,
      +  output_basename = NULL,
      +  sig_figs = NULL,
      +  chains = 4,
      +  parallel_chains = getOption("mc.cores", 1),
      +  chain_ids = seq_len(chains),
      +  threads_per_chain = NULL,
      +  opencl_ids = NULL,
      +  iter_warmup = NULL,
      +  iter_sampling = NULL,
      +  save_warmup = FALSE,
      +  thin = NULL,
      +  max_treedepth = NULL,
      +  adapt_engaged = TRUE,
      +  adapt_delta = NULL,
      +  step_size = NULL,
      +  metric = NULL,
      +  metric_file = NULL,
      +  inv_metric = NULL,
      +  init_buffer = NULL,
      +  term_buffer = NULL,
      +  window = NULL,
      +  fixed_param = FALSE,
      +  show_messages = TRUE,
      +  show_exceptions = TRUE,
      +  diagnostics = c("divergences", "treedepth", "ebfmi"),
      +  cores = NULL,
      +  num_cores = NULL,
      +  num_chains = NULL,
      +  num_warmup = NULL,
      +  num_samples = NULL,
      +  validate_csv = NULL,
      +  save_extra_diagnostics = NULL,
      +  max_depth = NULL,
      +  stepsize = NULL
      +)
      +
      + +
      +

      Arguments

      +
      data
      +

      (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

      • A named list of R objects with the names corresponding to variables declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using write_stan_json(). See -write_stan_json() for details on the conversions performed on R objects +written to JSON for CmdStan using write_stan_json(). See +write_stan_json() for details on the conversions performed on R objects before they are passed to Stan.

      • A path to a data file compatible with CmdStan (JSON or R dump). See the appendices in the CmdStan guide for details on using these formats.

      • NULL or an empty list if the Stan program has no data block.

      • -
    seed

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. + + + +

    seed
    +

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single seed will automatically be augmented by the the run (chain) ID so that each chain uses a different seed. The exception is the transformed data block, which defaults to @@ -262,25 +190,24 @@

    Arg chains if RNG functions are used. The only time seed should be specified as a vector (one element per chain) is if RNG functions are used in transformed data and the goal is to generate different data for each -chain.

    refresh

    (non-negative integer) The number of iterations between +chain.

    + + +
    refresh
    +

    (non-negative integer) The number of iterations between printed screen updates. If refresh = 0, only error messages will be -printed.

    init

    (multiple options) The initialization method to use for the +printed.

    + + +
    init
    +

    (multiple options) The initialization method to use for the variables declared in the parameters block of the Stan program. One of -the following:

      -
    • A real number x>0. This initializes all parameters randomly between +the following:

      • A real number x>0. This initializes all parameters randomly between [-x,x] on the unconstrained parameter space.;

      • The number 0. This initializes all parameters to 0;

      • A character vector of paths (one per chain) to JSON or Rdump files containing initial values for all or some parameters. See -write_stan_json() to write R objects to JSON files compatible with +write_stan_json() to write R objects to JSON files compatible with CmdStan.

      • A list of lists containing initial values for all or some parameters. For MCMC the list should contain a sublist for each chain. For optimization and @@ -293,601 +220,628 @@

        Arg has argument chain_id it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See Examples.

      • -
    save_latent_dynamics

    (logical) Should auxiliary diagnostic information + + + +

    save_latent_dynamics
    +

    (logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's diagnostic_file argument and the content written to CSV is controlled by the user's CmdStan installation and not CmdStanR (for some algorithms no content may be written). The default is FALSE, which is appropriate for almost every use case. To save the temporary files created when save_latent_dynamics=TRUE see the -$save_latent_dynamics_files() -method.

    output_dir

    (string) A path to a directory where CmdStan should write +$save_latent_dynamics_files() +method.

    + + +
    output_dir
    +

    (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at NULL (temporary directory) since CmdStanR makes the CmdStan output (posterior draws and diagnostics) available in R via methods of the fitted -model objects. The behavior of output_dir is as follows:

      -
    • If NULL (the default), then the CSV files are written to a temporary +model objects. The behavior of output_dir is as follows:

      • If NULL (the default), then the CSV files are written to a temporary directory and only saved permanently if the user calls one of the $save_* methods of the fitted model object (e.g., -$save_output_files()). These temporary +$save_output_files()). These temporary files are removed when the fitted model object is -garbage collected (manually or automatically).

      • +garbage collected (manually or automatically).

      • If a path, then the files are created in output_dir with names corresponding to the defaults used by $save_output_files().

      • -
    output_basename

    (string) A string to use as a prefix for the names of + + + +

    output_basename
    +

    (string) A string to use as a prefix for the names of the output CSV files of CmdStan. If NULL (the default), the basename of the output CSV files will be comprised from the model name, timestamp, and -5 random characters.

    sig_figs

    (positive integer) The number of significant figures used +5 random characters.

    + + +
    sig_figs
    +

    (positive integer) The number of significant figures used when storing the output values. By default, CmdStan represent the output values with 6 significant figures. The upper limit for sig_figs is 18. Increasing this value will result in larger output CSV files and thus an -increased usage of disk space.

    chains

    (positive integer) The number of Markov chains to run. The -default is 4.

    parallel_chains

    (positive integer) The maximum number of MCMC chains +increased usage of disk space.

    + + +
    chains
    +

    (positive integer) The number of Markov chains to run. The +default is 4.

    + + +
    parallel_chains
    +

    (positive integer) The maximum number of MCMC chains to run in parallel. If parallel_chains is not specified then the default is to look for the option "mc.cores", which can be set for an entire R -session by options(mc.cores=value). If the "mc.cores" option has not -been set then the default is 1.

    chain_ids

    (integer vector) A vector of chain IDs. Must contain as many +session by options(mc.cores=value). If the "mc.cores" option has not +been set then the default is 1.

    + + +
    chain_ids
    +

    (integer vector) A vector of chain IDs. Must contain as many unique positive integers as the number of chains. If not set, the default -chain IDs are used (integers starting from 1).

    threads_per_chain

    (positive integer) If the model was -compiled with threading support, the number of +chain IDs are used (integers starting from 1).

    + + +
    threads_per_chain
    +

    (positive integer) If the model was +compiled with threading support, the number of threads to use in parallelized sections within an MCMC chain (e.g., when using the Stan functions reduce_sum() or map_rect()). This is in contrast with parallel_chains, which specifies the number of chains to run in parallel. The actual number of CPU cores used is parallel_chains*threads_per_chain. For an example of using threading see the Stan case study -Reduce Sum: A Minimal Example.

    opencl_ids

    (integer vector of length 2) The platform and +Reduce Sum: A Minimal Example.

    + + +
    opencl_ids
    +

    (integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must be compiled with cpp_options = list(stan_opencl = TRUE) for this -argument to have an effect.

    iter_warmup

    (positive integer) The number of warmup iterations to run +argument to have an effect.

    + + +
    iter_warmup
    +

    (positive integer) The number of warmup iterations to run per chain. Note: in the CmdStan User's Guide this is referred to as -num_warmup.

    iter_sampling

    (positive integer) The number of post-warmup iterations +num_warmup.

    + + +
    iter_sampling
    +

    (positive integer) The number of post-warmup iterations to run per chain. Note: in the CmdStan User's Guide this is referred to as -num_samples.

    save_warmup

    (logical) Should warmup iterations be saved? The default -is FALSE.

    thin

    (positive integer) The period between saved samples. This should -typically be left at its default (no thinning) unless memory is a problem.

    max_treedepth

    (positive integer) The maximum allowed tree depth for +num_samples.

    + + +
    save_warmup
    +

    (logical) Should warmup iterations be saved? The default +is FALSE.

    + + +
    thin
    +

    (positive integer) The period between saved samples. This should +typically be left at its default (no thinning) unless memory is a problem.

    + + +
    max_treedepth
    +

    (positive integer) The maximum allowed tree depth for the NUTS engine. See the Tree Depth section of the CmdStan User's Guide -for more details.

    adapt_engaged

    (logical) Do warmup adaptation? The default is TRUE. +for more details.

    + + +
    adapt_engaged
    +

    (logical) Do warmup adaptation? The default is TRUE. If a precomputed inverse metric is specified via the inv_metric argument (or metric_file) then, if adapt_engaged=TRUE, Stan will use the provided inverse metric just as an initial guess during adaptation. To turn off adaptation when using a precomputed inverse metric set -adapt_engaged=FALSE.

    adapt_delta

    (real in (0,1)) The adaptation target acceptance -statistic.

    step_size

    (positive real) The initial step size for the discrete +adapt_engaged=FALSE.

    + + +
    adapt_delta
    +

    (real in (0,1)) The adaptation target acceptance +statistic.

    + + +
    step_size
    +

    (positive real) The initial step size for the discrete approximation to continuous Hamiltonian dynamics. This is further tuned -during warmup.

    metric

    (string) One of "diag_e", "dense_e", or "unit_e", +during warmup.

    + + +
    metric
    +

    (string) One of "diag_e", "dense_e", or "unit_e", specifying the geometry of the base manifold. See the Euclidean Metric section of the CmdStan User's Guide for more details. To specify a -precomputed (inverse) metric, see the inv_metric argument below.

    metric_file

    (character vector) The paths to JSON or +precomputed (inverse) metric, see the inv_metric argument below.

    + + +
    metric_file
    +

    (character vector) The paths to JSON or Rdump files (one per chain) compatible with CmdStan that contain precomputed inverse metrics. The metric_file argument is inherited from CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be named inv_metric, referring to the inverse metric. We recommend instead using CmdStanR's inv_metric argument (see below) to specify an inverse -metric directly using a vector or matrix from your R session.

    inv_metric

    (vector, matrix) A vector (if metric='diag_e') or a +metric directly using a vector or matrix from your R session.

    + + +
    inv_metric
    +

    (vector, matrix) A vector (if metric='diag_e') or a matrix (if metric='dense_e') for initializing the inverse metric. This can be used as an alternative to the metric_file argument. A vector is interpreted as a diagonal metric. The inverse metric is usually set to an estimate of the posterior covariance. See the adapt_engaged argument above for details about (and control over) how specifying a precomputed -inverse metric interacts with adaptation.

    init_buffer

    (nonnegative integer) Width of initial fast timestep -adaptation interval during warmup.

    term_buffer

    (nonnegative integer) Width of final fast timestep -adaptation interval during warmup.

    window

    (nonnegative integer) Initial width of slow timestep/metric -adaptation interval.

    fixed_param

    (logical) When TRUE, call CmdStan with argument +inverse metric interacts with adaptation.

    + + +
    init_buffer
    +

    (nonnegative integer) Width of initial fast timestep +adaptation interval during warmup.

    + + +
    term_buffer
    +

    (nonnegative integer) Width of final fast timestep +adaptation interval during warmup.

    + + +
    window
    +

    (nonnegative integer) Initial width of slow timestep/metric +adaptation interval.

    + + +
    fixed_param
    +

    (logical) When TRUE, call CmdStan with argument "algorithm=fixed_param". The default is FALSE. The fixed parameter sampler generates a new sample without changing the current state of the Markov chain; only generated quantities may change. This can be useful when, for example, trying to generate pseudo-data using the generated quantities block. If the parameters block is empty then using fixed_param=TRUE is mandatory. When fixed_param=TRUE the chains and -parallel_chains arguments will be set to 1.

    show_messages

    (logical) When TRUE (the default), prints all +parallel_chains arguments will be set to 1.

    + + +
    show_messages
    +

    (logical) When TRUE (the default), prints all +output during the sampling process, such as iteration numbers and elapsed times. +If the output is silenced then the $output() method of +the resulting fit object can be used to display the silenced messages.

    + + +
    show_exceptions
    +

    (logical) When TRUE (the default), prints all informational messages, for example rejection of the current proposal. Disable if you wish to silence these messages, but this is not usually recommended unless you are very confident that the model is correct up to numerical error. If the messages are silenced then the -$output() method of the resulting fit object can be -used to display the silenced messages.

    diagnostics

    (character vector) The diagnostics to automatically check +$output() method of the resulting fit object can be +used to display the silenced messages.

    + + +
    diagnostics
    +

    (character vector) The diagnostics to automatically check and warn about after sampling. Setting this to an empty string "" or NULL can be used to prevent CmdStanR from automatically reading in the sampler diagnostics from CSV if you wish to manually read in the results -and validate them yourself, for example using read_cmdstan_csv(). The +and validate them yourself, for example using read_cmdstan_csv(). The currently available diagnostics are "divergences", "treedepth", and "ebfmi" (the default is to check all of them).

    These diagnostics are also available after fitting. The -$sampler_diagnostics() method provides +$sampler_diagnostics() method provides access the diagnostic values for each iteration and the -$diagnostic_summary() method provides +$diagnostic_summary() method provides summaries of the diagnostics and can regenerate the warning messages.

    Diagnostics like R-hat and effective sample size are not currently available via the diagnostics argument but can be checked after fitting -using the $summary() method.

    cores, num_cores, num_chains, num_warmup, num_samples, save_extra_diagnostics, max_depth, stepsize, validate_csv

    Deprecated and will be removed in a future release.

    +using the $summary() method.

    + -

    Value

    +
    cores, num_cores, num_chains, num_warmup, num_samples, save_extra_diagnostics, max_depth, stepsize, validate_csv
    +

    Deprecated and will be removed in a future release.

    -

    A CmdStanMCMC object.

    -

    See also

    +
    +
    +

    Value

    + -

    The CmdStanR website -(mc-stan.org/cmdstanr) for online +

    A CmdStanMCMC object.

    +
    +
    +

    See also

    +

    The CmdStanR website +(mc-stan.org/cmdstanr) for online documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    - -

    Other CmdStanModel methods: -model-method-check_syntax, -model-method-compile, -model-method-diagnose, -model-method-format, -model-method-generate-quantities, -model-method-optimize, -model-method-sample_mpi, -model-method-variables, -model-method-variational

    - -

    Examples

    -
    # \dontrun{ -library(cmdstanr) -library(posterior) -library(bayesplot) -color_scheme_set("brightblue") - -# Set path to CmdStan -# (Note: if you installed CmdStan via install_cmdstan() with default settings -# then setting the path is unnecessary but the default below should still work. -# Otherwise use the `path` argument to specify the location of your -# CmdStan installation.) -set_cmdstan_path(path = NULL) -
    #> CmdStan path set to: /Users/jgabry/.cmdstan/cmdstan-2.29.1
    -# Create a CmdStanModel object from a Stan program, -# here using the example model that comes with CmdStan -file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan") -mod <- cmdstan_model(file) -mod$print() -
    #> data { -#> int<lower=0> N; -#> array[N] int<lower=0,upper=1> y; // or int<lower=0,upper=1> y[N]; -#> } -#> parameters { -#> real<lower=0,upper=1> theta; -#> } -#> model { -#> theta ~ beta(1,1); // uniform prior on interval 0,1 -#> y ~ bernoulli(theta); -#> }
    -# Data as a named list (like RStan) -stan_data <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) - -# Run MCMC using the 'sample' method -fit_mcmc <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - parallel_chains = 2 -) -
    #> Running MCMC with 2 parallel chains... -#> -#> Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup) -#> Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup) -#> Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup) -#> Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup) -#> Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup) -#> Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup) -#> Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup) -#> Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup) -#> Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup) -#> Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup) -#> Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup) -#> Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling) -#> Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling) -#> Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling) -#> Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling) -#> Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling) -#> Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling) -#> Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling) -#> Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling) -#> Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling) -#> Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling) -#> Chain 1 Iteration: 2000 / 2000 [100%] (Sampling) -#> Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup) -#> Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup) -#> Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup) -#> Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup) -#> Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup) -#> Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup) -#> Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup) -#> Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup) -#> Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup) -#> Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup) -#> Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup) -#> Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling) -#> Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling) -#> Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling) -#> Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling) -#> Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling) -#> Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling) -#> Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling) -#> Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling) -#> Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling) -#> Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling) -#> Chain 2 Iteration: 2000 / 2000 [100%] (Sampling) -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.2 seconds. -#>
    -# Use 'posterior' package for summaries -fit_mcmc$summary() -
    #> # A tibble: 2 × 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -7.30 -7.03 0.721 0.380 -8.82 -6.75 1.00 902. 1006. -#> 2 theta 0.247 0.233 0.122 0.129 0.0786 0.470 1.00 762. 712.
    -# Get posterior draws -draws <- fit_mcmc$draws() -print(draws) -
    #> # A draws_array: 1000 iterations, 2 chains, and 2 variables -#> , , variable = lp__ -#> -#> chain -#> iteration 1 2 -#> 1 -6.8 -6.8 -#> 2 -6.9 -6.8 -#> 3 -7.0 -7.0 -#> 4 -6.9 -7.1 -#> 5 -6.7 -7.0 -#> -#> , , variable = theta -#> -#> chain -#> iteration 1 2 -#> 1 0.28 0.21 -#> 2 0.19 0.20 -#> 3 0.16 0.17 -#> 4 0.20 0.36 -#> 5 0.25 0.34 -#> -#> # ... with 995 more iterations
    -# Convert to data frame using posterior::as_draws_df -as_draws_df(draws) -
    #> # A draws_df: 1000 iterations, 2 chains, and 2 variables -#> lp__ theta -#> 1 -6.8 0.28 -#> 2 -6.9 0.19 -#> 3 -7.0 0.16 -#> 4 -6.9 0.20 -#> 5 -6.7 0.25 -#> 6 -7.1 0.36 -#> 7 -9.0 0.55 -#> 8 -7.2 0.15 -#> 9 -6.8 0.23 -#> 10 -7.5 0.42 -#> # ... with 1990 more draws -#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    -# Plot posterior using bayesplot (ggplot2) -mcmc_hist(fit_mcmc$draws("theta")) -
    #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    -# Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose() -
    #> Processing csv files: /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/bernoulli-202203181226-1-05e2b0.csv, /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/bernoulli-202203181226-2-05e2b0.csv -#> -#> Checking sampler transitions treedepth. -#> Treedepth satisfactory for all transitions. -#> -#> Checking sampler transitions for divergences. -#> No divergent transitions found. -#> -#> Checking E-BFMI - sampler transitions HMC potential energy. -#> E-BFMI satisfactory. -#> -#> Effective sample size satisfactory. -#> -#> Split R-hat values satisfactory all parameters. -#> -#> Processing complete, no problems detected.
    fit_mcmc$cmdstan_summary() -
    #> Inference for Stan model: bernoulli_model -#> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. -#> -#> Warmup took (0.0050, 0.0050) seconds, 0.010 seconds total -#> Sampling took (0.015, 0.014) seconds, 0.029 seconds total -#> -#> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat -#> -#> lp__ -7.3 2.6e-02 0.72 -8.8 -7.0 -6.8 781 26932 1.0 -#> accept_stat__ 0.92 8.3e-03 0.13 0.64 0.97 1.0 2.3e+02 8.1e+03 1.0e+00 -#> stepsize__ 0.95 7.9e-02 0.079 0.87 1.0 1.0 1.0e+00 3.5e+01 2.0e+13 -#> treedepth__ 1.4 1.1e-02 0.48 1.0 1.0 2.0 1.9e+03 6.5e+04 1.0e+00 -#> n_leapfrog__ 2.5 1.4e-01 1.3 1.0 3.0 3.0 8.9e+01 3.1e+03 1.0e+00 -#> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 3.6e-02 1.00 6.8 7.5 9.6 7.7e+02 2.7e+04 1.0e+00 -#> -#> theta 0.25 4.3e-03 0.12 0.079 0.23 0.47 796 27460 1.0 -#> -#> Samples were drawn using hmc with nuts. -#> For each parameter, N_Eff is a crude measure of effective sample size, -#> and R_hat is the potential scale reduction factor on split chains (at -#> convergence, R_hat=1).
    -# For models fit using MCMC, if you like working with RStan's stanfit objects -# then you can create one with rstan::read_stan_csv() - -# stanfit <- rstan::read_stan_csv(fit_mcmc$output_files()) - - -# Run 'optimize' method to get a point estimate (default is Stan's LBFGS algorithm) -# and also demonstrate specifying data as a path to a file instead of a list -my_data_file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.data.json") -fit_optim <- mod$optimize(data = my_data_file, seed = 123) -
    #> Initial log joint probability = -9.51104 -#> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes -#> 6 -5.00402 0.000103557 2.55661e-07 1 1 9 -#> Optimization terminated normally: -#> Convergence detected: relative gradient magnitude is below tolerance -#> Finished in 0.1 seconds.
    -fit_optim$summary() -
    #> # A tibble: 2 × 2 -#> variable estimate -#> <chr> <dbl> -#> 1 lp__ -5.00 -#> 2 theta 0.2
    - -# Run 'variational' method to approximate the posterior (default is meanfield ADVI) -fit_vb <- mod$variational(data = stan_data, seed = 123) -
    #> ------------------------------------------------------------ -#> EXPERIMENTAL ALGORITHM: -#> This procedure has not been thoroughly tested and may be unstable -#> or buggy. The interface is subject to change. -#> ------------------------------------------------------------ -#> Gradient evaluation took 8e-06 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.08 seconds. -#> Adjust your expectations accordingly! -#> Begin eta adaptation. -#> Iteration: 1 / 250 [ 0%] (Adaptation) -#> Iteration: 50 / 250 [ 20%] (Adaptation) -#> Iteration: 100 / 250 [ 40%] (Adaptation) -#> Iteration: 150 / 250 [ 60%] (Adaptation) -#> Iteration: 200 / 250 [ 80%] (Adaptation) -#> Success! Found best value [eta = 1] earlier than expected. -#> Begin stochastic gradient ascent. -#> iter ELBO delta_ELBO_mean delta_ELBO_med notes -#> 100 -6.262 1.000 1.000 -#> 200 -6.263 0.500 1.000 -#> 300 -6.307 0.336 0.007 MEDIAN ELBO CONVERGED -#> Drawing a sample of size 1000 from the approximate posterior... -#> COMPLETED. -#> Finished in 0.1 seconds.
    -fit_vb$summary() -
    #> # A tibble: 3 × 7 -#> variable mean median sd mad q5 q95 -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -7.18 -6.94 0.588 0.259 -8.36 -6.75 -#> 2 lp_approx__ -0.515 -0.221 0.692 0.303 -2.06 -0.00257 -#> 3 theta 0.263 0.246 0.115 0.113 0.106 0.481
    -# Plot approximate posterior using bayesplot -mcmc_hist(fit_vb$draws("theta")) -
    #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    - -# Specifying initial values as a function -fit_mcmc_w_init_fun <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = function() list(theta = runif(1)) -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds. -#>
    fit_mcmc_w_init_fun_2 <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = function(chain_id) { - # silly but demonstrates optional use of chain_id - list(theta = 1 / (chain_id + 1)) - } -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds. -#>
    fit_mcmc_w_init_fun_2$init() -
    #> [[1]] -#> [[1]]$theta -#> [1] 0.5 -#> -#> -#> [[2]] -#> [[2]]$theta -#> [1] 0.3333333 -#> -#>
    -# Specifying initial values as a list of lists -fit_mcmc_w_init_list <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = list( - list(theta = 0.75), # chain 1 - list(theta = 0.25) # chain 2 - ) -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds. -#>
    fit_optim_w_init_list <- mod$optimize( - data = stan_data, - seed = 123, - init = list( - list(theta = 0.75) - ) -) -
    #> Initial log joint probability = -11.6657 -#> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes -#> 6 -5.00402 0.000237915 9.55309e-07 1 1 9 -#> Optimization terminated normally: -#> Convergence detected: relative gradient magnitude is below tolerance -#> Finished in 0.3 seconds.
    fit_optim_w_init_list$init() -
    #> [[1]] -#> [[1]]$theta -#> [1] 0.75 -#> -#>
    # } - -
    +

    The Stan and CmdStan documentation:

    Other CmdStanModel methods: +model-method-check_syntax, +model-method-compile, +model-method-diagnose, +model-method-expose_functions, +model-method-format, +model-method-generate-quantities, +model-method-optimize, +model-method-sample_mpi, +model-method-variables, +model-method-variational

    +
    + +
    +

    Examples

    +
    # \dontrun{
    +library(cmdstanr)
    +library(posterior)
    +library(bayesplot)
    +color_scheme_set("brightblue")
    +
    +# Set path to CmdStan
    +# (Note: if you installed CmdStan via install_cmdstan() with default settings
    +# then setting the path is unnecessary but the default below should still work.
    +# Otherwise use the `path` argument to specify the location of your
    +# CmdStan installation.)
    +set_cmdstan_path(path = NULL)
    +#> CmdStan path set to: /Users/jgabry/.cmdstan/cmdstan-2.32.2
    +
    +# Create a CmdStanModel object from a Stan program,
    +# here using the example model that comes with CmdStan
    +file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan")
    +mod <- cmdstan_model(file)
    +mod$print()
    +#> data {
    +#>   int<lower=0> N;
    +#>   array[N] int<lower=0,upper=1> y;
    +#> }
    +#> parameters {
    +#>   real<lower=0,upper=1> theta;
    +#> }
    +#> model {
    +#>   theta ~ beta(1,1);  // uniform prior on interval 0,1
    +#>   y ~ bernoulli(theta);
    +#> }
    +
    +# Data as a named list (like RStan)
    +stan_data <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1))
    +
    +# Run MCMC using the 'sample' method
    +fit_mcmc <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  parallel_chains = 2
    +)
    +#> Running MCMC with 2 parallel chains...
    +#> 
    +#> Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
    +#> Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
    +#> Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
    +#> Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
    +#> Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
    +#> Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
    +#> Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
    +#> Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
    +#> Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
    +#> Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
    +#> Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
    +#> Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
    +#> Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
    +#> Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
    +#> Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
    +#> Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
    +#> Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
    +#> Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
    +#> Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
    +#> Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
    +#> Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
    +#> Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
    +#> Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup) 
    +#> Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
    +#> Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
    +#> Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
    +#> Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
    +#> Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
    +#> Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
    +#> Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
    +#> Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
    +#> Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
    +#> Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
    +#> Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
    +#> Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
    +#> Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
    +#> Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
    +#> Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
    +#> Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
    +#> Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
    +#> Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
    +#> Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
    +#> Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
    +#> Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.2 seconds.
    +#> 
    +
    +# Use 'posterior' package for summaries
    +fit_mcmc$summary()
    +#> # A tibble: 2 × 10
    +#>   variable   mean median    sd   mad      q5    q95  rhat ess_bulk ess_tail
    +#>   <chr>     <num>  <num> <num> <num>   <num>  <num> <num>    <num>    <num>
    +#> 1 lp__     -7.30  -7.03  0.721 0.380 -8.82   -6.75   1.00     902.    1006.
    +#> 2 theta     0.247  0.233 0.122 0.129  0.0786  0.470  1.00     762.     712.
    +
    +# Get posterior draws
    +draws <- fit_mcmc$draws()
    +print(draws)
    +#> # A draws_array: 1000 iterations, 2 chains, and 2 variables
    +#> , , variable = lp__
    +#> 
    +#>          chain
    +#> iteration    1    2
    +#>         1 -6.8 -6.8
    +#>         2 -6.9 -6.8
    +#>         3 -7.0 -7.0
    +#>         4 -6.9 -7.1
    +#>         5 -6.7 -7.0
    +#> 
    +#> , , variable = theta
    +#> 
    +#>          chain
    +#> iteration    1    2
    +#>         1 0.28 0.21
    +#>         2 0.19 0.20
    +#>         3 0.16 0.17
    +#>         4 0.20 0.36
    +#>         5 0.25 0.34
    +#> 
    +#> # ... with 995 more iterations
    +
    +# Convert to data frame using posterior::as_draws_df
    +as_draws_df(draws)
    +#> # A draws_df: 1000 iterations, 2 chains, and 2 variables
    +#>    lp__ theta
    +#> 1  -6.8  0.28
    +#> 2  -6.9  0.19
    +#> 3  -7.0  0.16
    +#> 4  -6.9  0.20
    +#> 5  -6.7  0.25
    +#> 6  -7.1  0.36
    +#> 7  -9.0  0.55
    +#> 8  -7.2  0.15
    +#> 9  -6.8  0.23
    +#> 10 -7.5  0.42
    +#> # ... with 1990 more draws
    +#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    +
    +# Plot posterior using bayesplot (ggplot2)
    +mcmc_hist(fit_mcmc$draws("theta"))
    +#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    +
    +
    +# Call CmdStan's diagnose and stansummary utilities
    +fit_mcmc$cmdstan_diagnose()
    +#> Processing csv files: /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/bernoulli-202307251438-1-239737.csv, /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/bernoulli-202307251438-2-239737.csv
    +#> 
    +#> Checking sampler transitions treedepth.
    +#> Treedepth satisfactory for all transitions.
    +#> 
    +#> Checking sampler transitions for divergences.
    +#> No divergent transitions found.
    +#> 
    +#> Checking E-BFMI - sampler transitions HMC potential energy.
    +#> E-BFMI satisfactory.
    +#> 
    +#> Effective sample size satisfactory.
    +#> 
    +#> Split R-hat values satisfactory all parameters.
    +#> 
    +#> Processing complete, no problems detected.
    +fit_mcmc$cmdstan_summary()
    +#> Inference for Stan model: bernoulli_model
    +#> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved.
    +#> 
    +#> Warmup took (0.0040, 0.0040) seconds, 0.0080 seconds total
    +#> Sampling took (0.011, 0.011) seconds, 0.022 seconds total
    +#> 
    +#>                 Mean     MCSE  StdDev     5%   50%   95%  N_Eff  N_Eff/s    R_hat
    +#> 
    +#> lp__            -7.3  2.6e-02    0.72   -8.8  -7.0  -6.8    781    35502      1.0
    +#> accept_stat__   0.92  8.3e-03    0.13   0.64  0.97   1.0    235    10662  1.0e+00
    +#> stepsize__      0.95  7.9e-02   0.079   0.87   1.0   1.0    1.0       46  2.0e+13
    +#> treedepth__      1.4  1.1e-02    0.48    1.0   1.0   2.0   1874    85179  1.0e+00
    +#> n_leapfrog__     2.5  1.4e-01     1.3    1.0   3.0   3.0     89     4050  1.0e+00
    +#> divergent__     0.00      nan    0.00   0.00  0.00  0.00    nan      nan      nan
    +#> energy__         7.8  3.6e-02    1.00    6.8   7.5   9.6    775    35215  1.0e+00
    +#> 
    +#> theta           0.25  4.3e-03    0.12  0.079  0.23  0.47    796    36197      1.0
    +#> 
    +#> Samples were drawn using hmc with nuts.
    +#> For each parameter, N_Eff is a crude measure of effective sample size,
    +#> and R_hat is the potential scale reduction factor on split chains (at 
    +#> convergence, R_hat=1).
    +
    +# For models fit using MCMC, if you like working with RStan's stanfit objects
    +# then you can create one with rstan::read_stan_csv()
    +
    +# stanfit <- rstan::read_stan_csv(fit_mcmc$output_files())
    +
    +
    +# Run 'optimize' method to get a point estimate (default is Stan's LBFGS algorithm)
    +# and also demonstrate specifying data as a path to a file instead of a list
    +my_data_file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.data.json")
    +fit_optim <- mod$optimize(data = my_data_file, seed = 123)
    +#> Initial log joint probability = -9.51104 
    +#>     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes  
    +#>        6      -5.00402   0.000103557   2.55661e-07           1           1        9    
    +#> Optimization terminated normally:  
    +#>   Convergence detected: relative gradient magnitude is below tolerance 
    +#> Finished in  0.1 seconds.
    +
    +fit_optim$summary()
    +#> # A tibble: 2 × 2
    +#>   variable estimate
    +#>   <chr>       <num>
    +#> 1 lp__        -5.00
    +#> 2 theta        0.2 
    +
    +
    +# Run 'variational' method to approximate the posterior (default is meanfield ADVI)
    +fit_vb <- mod$variational(data = stan_data, seed = 123)
    +#> ------------------------------------------------------------ 
    +#> EXPERIMENTAL ALGORITHM: 
    +#>   This procedure has not been thoroughly tested and may be unstable 
    +#>   or buggy. The interface is subject to change. 
    +#> ------------------------------------------------------------ 
    +#> Gradient evaluation took 9e-06 seconds 
    +#> 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds. 
    +#> Adjust your expectations accordingly! 
    +#> Begin eta adaptation. 
    +#> Iteration:   1 / 250 [  0%]  (Adaptation) 
    +#> Iteration:  50 / 250 [ 20%]  (Adaptation) 
    +#> Iteration: 100 / 250 [ 40%]  (Adaptation) 
    +#> Iteration: 150 / 250 [ 60%]  (Adaptation) 
    +#> Iteration: 200 / 250 [ 80%]  (Adaptation) 
    +#> Success! Found best value [eta = 1] earlier than expected. 
    +#> Begin stochastic gradient ascent. 
    +#>   iter             ELBO   delta_ELBO_mean   delta_ELBO_med   notes  
    +#>    100           -6.262             1.000            1.000 
    +#>    200           -6.263             0.500            1.000 
    +#>    300           -6.307             0.336            0.007   MEDIAN ELBO CONVERGED 
    +#> Drawing a sample of size 1000 from the approximate posterior...  
    +#> COMPLETED. 
    +#> Finished in  0.1 seconds.
    +
    +fit_vb$summary()
    +#> # A tibble: 3 × 7
    +#>   variable      mean median    sd   mad     q5      q95
    +#>   <chr>        <num>  <num> <num> <num>  <num>    <num>
    +#> 1 lp__        -7.18  -6.94  0.588 0.259 -8.36  -6.75   
    +#> 2 lp_approx__ -0.515 -0.221 0.692 0.303 -2.06  -0.00257
    +#> 3 theta        0.263  0.246 0.115 0.113  0.106  0.481  
    +
    +# Plot approximate posterior using bayesplot
    +mcmc_hist(fit_vb$draws("theta"))
    +#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    +
    +
    +
    +# Specifying initial values as a function
    +fit_mcmc_w_init_fun <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = function() list(theta = runif(1))
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_mcmc_w_init_fun_2 <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = function(chain_id) {
    +    # silly but demonstrates optional use of chain_id
    +    list(theta = 1 / (chain_id + 1))
    +  }
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_mcmc_w_init_fun_2$init()
    +#> [[1]]
    +#> [[1]]$theta
    +#> [1] 0.5
    +#> 
    +#> 
    +#> [[2]]
    +#> [[2]]$theta
    +#> [1] 0.3333333
    +#> 
    +#> 
    +
    +# Specifying initial values as a list of lists
    +fit_mcmc_w_init_list <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = list(
    +    list(theta = 0.75), # chain 1
    +    list(theta = 0.25)  # chain 2
    +  )
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_optim_w_init_list <- mod$optimize(
    +  data = stan_data,
    +  seed = 123,
    +  init = list(
    +    list(theta = 0.75)
    +  )
    +)
    +#> Initial log joint probability = -11.6657 
    +#>     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes  
    +#>        6      -5.00402   0.000237915   9.55309e-07           1           1        9    
    +#> Optimization terminated normally:  
    +#>   Convergence detected: relative gradient magnitude is below tolerance 
    +#> Finished in  0.1 seconds.
    +fit_optim_w_init_list$init()
    +#> [[1]]
    +#> [[1]]$theta
    +#> [1] 0.75
    +#> 
    +#> 
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/model-method-sample_mpi.html b/docs/reference/model-method-sample_mpi.html index 228dde82b..53d2eb1a3 100644 --- a/docs/reference/model-method-sample_mpi.html +++ b/docs/reference/model-method-sample_mpi.html @@ -1,53 +1,5 @@ - - - - - - - -Run Stan's MCMC algorithms with MPI — model-method-sample_mpi • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run Stan's MCMC algorithms with MPI — model-method-sample_mpi • cmdstanr - - + + - - -
    -
    - -
    - -
    +
    -

    The $sample_mpi() method of a CmdStanModel object is +

    The $sample_mpi() method of a CmdStanModel object is identical to the $sample() method but with support for -MPI. The target audience for MPI are +MPI. The target audience for MPI are those with large computer clusters. For other users, the -$sample() method provides both parallelization of +$sample() method provides both parallelization of chains and threading support for within-chain parallelization.

    In order to use MPI with Stan, an MPI implementation must be installed. For Unix systems the most commonly used implementations are MPICH and OpenMPI. The implementations provide an MPI C++ compiler wrapper (for example mpicxx), which is required to compile the model.

    -

    An example of compiling with MPI:

    mpi_options = list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc")
    -mod = cmdstan_model("model.stan", cpp_options = mpi_options)
    -
    - +

    An example of compiling with MPI:

    +

    mpi_options = list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc")
    +mod = cmdstan_model("model.stan", cpp_options = mpi_options)

    The C++ options that must be supplied to the -compile call are:

      -
    • STAN_MPI: Enables the use of MPI with Stan if TRUE.

    • +compile call are:

      • STAN_MPI: Enables the use of MPI with Stan if TRUE.

      • CXX: The name of the MPI C++ compiler wrapper. Typically "mpicxx".

      • TBB_CXX_TYPE: The C++ compiler the MPI wrapper wraps. Typically "gcc" on Linux and "clang" on macOS.

      • -
      - -

      In the call to the $sample_mpi() method it is also possible to provide +

    In the call to the $sample_mpi() method it is also possible to provide the name of the MPI launcher (mpi_cmd, defaulting to "mpiexec") and any other MPI launch arguments (mpi_args). In most cases, it is enough to only define the number of processes. To use n_procs processes specify mpi_args = list("n" = n_procs).

    -
    sample_mpi(
    -  data = NULL,
    -  mpi_cmd = "mpiexec",
    -  mpi_args = NULL,
    -  seed = NULL,
    -  refresh = NULL,
    -  init = NULL,
    -  save_latent_dynamics = FALSE,
    -  output_dir = NULL,
    -  output_basename = NULL,
    -  chains = 1,
    -  chain_ids = seq_len(chains),
    -  iter_warmup = NULL,
    -  iter_sampling = NULL,
    -  save_warmup = FALSE,
    -  thin = NULL,
    -  max_treedepth = NULL,
    -  adapt_engaged = TRUE,
    -  adapt_delta = NULL,
    -  step_size = NULL,
    -  metric = NULL,
    -  metric_file = NULL,
    -  inv_metric = NULL,
    -  init_buffer = NULL,
    -  term_buffer = NULL,
    -  window = NULL,
    -  fixed_param = FALSE,
    -  sig_figs = NULL,
    -  show_messages = TRUE,
    -  diagnostics = c("divergences", "treedepth", "ebfmi"),
    -  validate_csv = TRUE
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    data

    (multiple options) The data to use for the variables specified in -the data block of the Stan program. One of the following:

      -
    • A named list of R objects with the names corresponding to variables +

      +
      sample_mpi(
      +  data = NULL,
      +  mpi_cmd = "mpiexec",
      +  mpi_args = NULL,
      +  seed = NULL,
      +  refresh = NULL,
      +  init = NULL,
      +  save_latent_dynamics = FALSE,
      +  output_dir = NULL,
      +  output_basename = NULL,
      +  chains = 1,
      +  chain_ids = seq_len(chains),
      +  iter_warmup = NULL,
      +  iter_sampling = NULL,
      +  save_warmup = FALSE,
      +  thin = NULL,
      +  max_treedepth = NULL,
      +  adapt_engaged = TRUE,
      +  adapt_delta = NULL,
      +  step_size = NULL,
      +  metric = NULL,
      +  metric_file = NULL,
      +  inv_metric = NULL,
      +  init_buffer = NULL,
      +  term_buffer = NULL,
      +  window = NULL,
      +  fixed_param = FALSE,
      +  sig_figs = NULL,
      +  show_messages = TRUE,
      +  show_exceptions = TRUE,
      +  diagnostics = c("divergences", "treedepth", "ebfmi"),
      +  validate_csv = TRUE
      +)
      +
      + +
      +

      Arguments

      +
      data
      +

      (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

      • A named list of R objects with the names corresponding to variables declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using write_stan_json(). See -write_stan_json() for details on the conversions performed on R objects +written to JSON for CmdStan using write_stan_json(). See +write_stan_json() for details on the conversions performed on R objects before they are passed to Stan.

      • A path to a data file compatible with CmdStan (JSON or R dump). See the appendices in the CmdStan guide for details on using these formats.

      • NULL or an empty list if the Stan program has no data block.

      • -
    mpi_cmd

    (string) The MPI launcher used for launching MPI -processes. The default launcher is "mpiexec".

    mpi_args

    (list) A list of arguments to use when launching MPI + + + +

    mpi_cmd
    +

    (string) The MPI launcher used for launching MPI +processes. The default launcher is "mpiexec".

    + + +
    mpi_args
    +

    (list) A list of arguments to use when launching MPI processes. For example, mpi_args = list("n" = 4) launches the executable as mpiexec -n 4 model_executable, followed by CmdStan arguments for the -model executable.

    seed

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. +model executable.

    + + +
    seed
    +

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single seed will automatically be augmented by the the run (chain) ID so that each chain uses a different seed. The exception is the transformed data block, which defaults to @@ -303,25 +227,24 @@

    Arg chains if RNG functions are used. The only time seed should be specified as a vector (one element per chain) is if RNG functions are used in transformed data and the goal is to generate different data for each -chain.

    refresh

    (non-negative integer) The number of iterations between +chain.

    + + +
    refresh
    +

    (non-negative integer) The number of iterations between printed screen updates. If refresh = 0, only error messages will be -printed.

    init

    (multiple options) The initialization method to use for the +printed.

    + + +
    init
    +

    (multiple options) The initialization method to use for the variables declared in the parameters block of the Stan program. One of -the following:

      -
    • A real number x>0. This initializes all parameters randomly between +the following:

      • A real number x>0. This initializes all parameters randomly between [-x,x] on the unconstrained parameter space.;

      • The number 0. This initializes all parameters to 0;

      • A character vector of paths (one per chain) to JSON or Rdump files containing initial values for all or some parameters. See -write_stan_json() to write R objects to JSON files compatible with +write_stan_json() to write R objects to JSON files compatible with CmdStan.

      • A list of lists containing initial values for all or some parameters. For MCMC the list should contain a sublist for each chain. For optimization and @@ -334,258 +257,264 @@

        Arg has argument chain_id it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See Examples.

      • -
    save_latent_dynamics

    (logical) Should auxiliary diagnostic information + + + +

    save_latent_dynamics
    +

    (logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's diagnostic_file argument and the content written to CSV is controlled by the user's CmdStan installation and not CmdStanR (for some algorithms no content may be written). The default is FALSE, which is appropriate for almost every use case. To save the temporary files created when save_latent_dynamics=TRUE see the -$save_latent_dynamics_files() -method.

    output_dir

    (string) A path to a directory where CmdStan should write +$save_latent_dynamics_files() +method.

    + + +
    output_dir
    +

    (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at NULL (temporary directory) since CmdStanR makes the CmdStan output (posterior draws and diagnostics) available in R via methods of the fitted -model objects. The behavior of output_dir is as follows:

      -
    • If NULL (the default), then the CSV files are written to a temporary +model objects. The behavior of output_dir is as follows:

      • If NULL (the default), then the CSV files are written to a temporary directory and only saved permanently if the user calls one of the $save_* methods of the fitted model object (e.g., -$save_output_files()). These temporary +$save_output_files()). These temporary files are removed when the fitted model object is -garbage collected (manually or automatically).

      • +garbage collected (manually or automatically).

      • If a path, then the files are created in output_dir with names corresponding to the defaults used by $save_output_files().

      • -
    output_basename

    (string) A string to use as a prefix for the names of + + + +

    output_basename
    +

    (string) A string to use as a prefix for the names of the output CSV files of CmdStan. If NULL (the default), the basename of the output CSV files will be comprised from the model name, timestamp, and -5 random characters.

    chains

    (positive integer) The number of Markov chains to run. The -default is 4.

    chain_ids

    (integer vector) A vector of chain IDs. Must contain as many +5 random characters.

    + + +
    chains
    +

    (positive integer) The number of Markov chains to run. The +default is 4.

    + + +
    chain_ids
    +

    (integer vector) A vector of chain IDs. Must contain as many unique positive integers as the number of chains. If not set, the default -chain IDs are used (integers starting from 1).

    iter_warmup

    (positive integer) The number of warmup iterations to run +chain IDs are used (integers starting from 1).

    + + +
    iter_warmup
    +

    (positive integer) The number of warmup iterations to run per chain. Note: in the CmdStan User's Guide this is referred to as -num_warmup.

    iter_sampling

    (positive integer) The number of post-warmup iterations +num_warmup.

    + + +
    iter_sampling
    +

    (positive integer) The number of post-warmup iterations to run per chain. Note: in the CmdStan User's Guide this is referred to as -num_samples.

    save_warmup

    (logical) Should warmup iterations be saved? The default -is FALSE.

    thin

    (positive integer) The period between saved samples. This should -typically be left at its default (no thinning) unless memory is a problem.

    max_treedepth

    (positive integer) The maximum allowed tree depth for +num_samples.

    + + +
    save_warmup
    +

    (logical) Should warmup iterations be saved? The default +is FALSE.

    + + +
    thin
    +

    (positive integer) The period between saved samples. This should +typically be left at its default (no thinning) unless memory is a problem.

    + + +
    max_treedepth
    +

    (positive integer) The maximum allowed tree depth for the NUTS engine. See the Tree Depth section of the CmdStan User's Guide -for more details.

    adapt_engaged

    (logical) Do warmup adaptation? The default is TRUE. +for more details.

    + + +
    adapt_engaged
    +

    (logical) Do warmup adaptation? The default is TRUE. If a precomputed inverse metric is specified via the inv_metric argument (or metric_file) then, if adapt_engaged=TRUE, Stan will use the provided inverse metric just as an initial guess during adaptation. To turn off adaptation when using a precomputed inverse metric set -adapt_engaged=FALSE.

    adapt_delta

    (real in (0,1)) The adaptation target acceptance -statistic.

    step_size

    (positive real) The initial step size for the discrete +adapt_engaged=FALSE.

    + + +
    adapt_delta
    +

    (real in (0,1)) The adaptation target acceptance +statistic.

    + + +
    step_size
    +

    (positive real) The initial step size for the discrete approximation to continuous Hamiltonian dynamics. This is further tuned -during warmup.

    metric

    (string) One of "diag_e", "dense_e", or "unit_e", +during warmup.

    + + +
    metric
    +

    (string) One of "diag_e", "dense_e", or "unit_e", specifying the geometry of the base manifold. See the Euclidean Metric section of the CmdStan User's Guide for more details. To specify a -precomputed (inverse) metric, see the inv_metric argument below.

    metric_file

    (character vector) The paths to JSON or +precomputed (inverse) metric, see the inv_metric argument below.

    + + +
    metric_file
    +

    (character vector) The paths to JSON or Rdump files (one per chain) compatible with CmdStan that contain precomputed inverse metrics. The metric_file argument is inherited from CmdStan but is confusing in that the entry in JSON or Rdump file(s) must be named inv_metric, referring to the inverse metric. We recommend instead using CmdStanR's inv_metric argument (see below) to specify an inverse -metric directly using a vector or matrix from your R session.

    inv_metric

    (vector, matrix) A vector (if metric='diag_e') or a +metric directly using a vector or matrix from your R session.

    + + +
    inv_metric
    +

    (vector, matrix) A vector (if metric='diag_e') or a matrix (if metric='dense_e') for initializing the inverse metric. This can be used as an alternative to the metric_file argument. A vector is interpreted as a diagonal metric. The inverse metric is usually set to an estimate of the posterior covariance. See the adapt_engaged argument above for details about (and control over) how specifying a precomputed -inverse metric interacts with adaptation.

    init_buffer

    (nonnegative integer) Width of initial fast timestep -adaptation interval during warmup.

    term_buffer

    (nonnegative integer) Width of final fast timestep -adaptation interval during warmup.

    window

    (nonnegative integer) Initial width of slow timestep/metric -adaptation interval.

    fixed_param

    (logical) When TRUE, call CmdStan with argument +inverse metric interacts with adaptation.

    + + +
    init_buffer
    +

    (nonnegative integer) Width of initial fast timestep +adaptation interval during warmup.

    + + +
    term_buffer
    +

    (nonnegative integer) Width of final fast timestep +adaptation interval during warmup.

    + + +
    window
    +

    (nonnegative integer) Initial width of slow timestep/metric +adaptation interval.

    + + +
    fixed_param
    +

    (logical) When TRUE, call CmdStan with argument "algorithm=fixed_param". The default is FALSE. The fixed parameter sampler generates a new sample without changing the current state of the Markov chain; only generated quantities may change. This can be useful when, for example, trying to generate pseudo-data using the generated quantities block. If the parameters block is empty then using fixed_param=TRUE is mandatory. When fixed_param=TRUE the chains and -parallel_chains arguments will be set to 1.

    sig_figs

    (positive integer) The number of significant figures used +parallel_chains arguments will be set to 1.

    + + +
    sig_figs
    +

    (positive integer) The number of significant figures used when storing the output values. By default, CmdStan represent the output values with 6 significant figures. The upper limit for sig_figs is 18. Increasing this value will result in larger output CSV files and thus an -increased usage of disk space.

    show_messages

    (logical) When TRUE (the default), prints all +increased usage of disk space.

    + + +
    show_messages
    +

    (logical) When TRUE (the default), prints all +output during the sampling process, such as iteration numbers and elapsed times. +If the output is silenced then the $output() method of +the resulting fit object can be used to display the silenced messages.

    + + +
    show_exceptions
    +

    (logical) When TRUE (the default), prints all informational messages, for example rejection of the current proposal. Disable if you wish to silence these messages, but this is not usually recommended unless you are very confident that the model is correct up to numerical error. If the messages are silenced then the -$output() method of the resulting fit object can be -used to display the silenced messages.

    diagnostics

    (character vector) The diagnostics to automatically check +$output() method of the resulting fit object can be +used to display the silenced messages.

    + + +
    diagnostics
    +

    (character vector) The diagnostics to automatically check and warn about after sampling. Setting this to an empty string "" or NULL can be used to prevent CmdStanR from automatically reading in the sampler diagnostics from CSV if you wish to manually read in the results -and validate them yourself, for example using read_cmdstan_csv(). The +and validate them yourself, for example using read_cmdstan_csv(). The currently available diagnostics are "divergences", "treedepth", and "ebfmi" (the default is to check all of them).

    These diagnostics are also available after fitting. The -$sampler_diagnostics() method provides +$sampler_diagnostics() method provides access the diagnostic values for each iteration and the -$diagnostic_summary() method provides +$diagnostic_summary() method provides summaries of the diagnostics and can regenerate the warning messages.

    Diagnostics like R-hat and effective sample size are not currently available via the diagnostics argument but can be checked after fitting -using the $summary() method.

    validate_csv

    Deprecated and will be removed in a future release.

    +using the $summary() method.

    -

    Value

    -

    A CmdStanMCMC object.

    -

    See also

    +
    validate_csv
    +

    Deprecated. Use diagnostics instead.

    -

    The CmdStanR website -(mc-stan.org/cmdstanr) for online -documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    +
    +
    +

    Value

    + -

    The Stan Math Library's MPI documentation -(mc-stan.org/math/mpi) for more +

    A CmdStanMCMC object.

    +
    +
    +

    See also

    +

    The CmdStanR website +(mc-stan.org/cmdstanr) for online +documentation and tutorials.

    +

    The Stan and CmdStan documentation:

    The Stan Math Library's MPI documentation +(mc-stan.org/math/mpi) for more details on MPI support in Stan.

    Other CmdStanModel methods: -model-method-check_syntax, -model-method-compile, -model-method-diagnose, -model-method-format, -model-method-generate-quantities, -model-method-optimize, -model-method-sample, -model-method-variables, -model-method-variational

    - -

    Examples

    -
    # \dontrun{ -# mpi_options <- list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc") -# mod <- cmdstan_model("model.stan", cpp_options = mpi_options) -# fit <- mod$sample_mpi(..., mpi_args = list("n" = 4)) -# } - -
    +model-method-check_syntax, +model-method-compile, +model-method-diagnose, +model-method-expose_functions, +model-method-format, +model-method-generate-quantities, +model-method-optimize, +model-method-sample, +model-method-variables, +model-method-variational

    +
    + +
    +

    Examples

    +
    # \dontrun{
    +# mpi_options <- list(STAN_MPI=TRUE, CXX="mpicxx", TBB_CXX_TYPE="gcc")
    +# mod <- cmdstan_model("model.stan", cpp_options = mpi_options)
    +# fit <- mod$sample_mpi(..., mpi_args = list("n" = 4))
    +# }
    +
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/model-method-variables.html b/docs/reference/model-method-variables.html index bf4a9d265..f26828ae3 100644 --- a/docs/reference/model-method-variables.html +++ b/docs/reference/model-method-variables.html @@ -1,82 +1,19 @@ - - - - - - - -Input and output variables of a Stan program — model-method-variables • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Input and output variables of a Stan program — model-method-variables • cmdstanr - - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +
    -

    The $variables() method of a CmdStanModel object returns +

    The $variables() method of a CmdStanModel object returns a list, each element representing a Stan model block: data, parameters, transformed_parameters and generated_quantities.

    Each element contains a list of variables, with each variables represented @@ -193,99 +121,104 @@

    Input and output variables of a Stan program

    part of the model's input or output.

    -
    variables()
    - +
    +
    variables()
    +
    -

    Value

    +
    +

    Value

    + -

    The $variables() returns a list with information on input and +

    The $variables() returns a list with information on input and output variables for each of the Stan model blocks.

    -

    See also

    - - - -

    Examples

    -
    # \dontrun{ -file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan") - -# create a `CmdStanModel` object, compiling the model is not required -mod <- cmdstan_model(file, compile = FALSE) - -mod$variables() -
    #> $parameters -#> $parameters$theta -#> $parameters$theta$type -#> [1] "real" -#> -#> $parameters$theta$dimensions -#> [1] 0 -#> -#> -#> -#> $included_files -#> list() -#> -#> $data -#> $data$N -#> $data$N$type -#> [1] "int" -#> -#> $data$N$dimensions -#> [1] 0 -#> -#> -#> $data$y -#> $data$y$type -#> [1] "int" -#> -#> $data$y$dimensions -#> [1] 1 -#> -#> -#> -#> $transformed_parameters -#> named list() -#> -#> $generated_quantities -#> named list() -#>
    -# } +
    + -
    +
    +

    Examples

    +
    # \dontrun{
    +file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan")
    +
    +# create a `CmdStanModel` object, compiling the model is not required
    +mod <- cmdstan_model(file, compile = FALSE)
    +
    +mod$variables()
    +#> $parameters
    +#> $parameters$theta
    +#> $parameters$theta$type
    +#> [1] "real"
    +#> 
    +#> $parameters$theta$dimensions
    +#> [1] 0
    +#> 
    +#> 
    +#> 
    +#> $included_files
    +#> list()
    +#> 
    +#> $data
    +#> $data$N
    +#> $data$N$type
    +#> [1] "int"
    +#> 
    +#> $data$N$dimensions
    +#> [1] 0
    +#> 
    +#> 
    +#> $data$y
    +#> $data$y$type
    +#> [1] "int"
    +#> 
    +#> $data$y$dimensions
    +#> [1] 1
    +#> 
    +#> 
    +#> 
    +#> $transformed_parameters
    +#> named list()
    +#> 
    +#> $generated_quantities
    +#> named list()
    +#> 
    +
    +# }
    +
    +
    +
    +
    -
    - - + + diff --git a/docs/reference/model-method-variational-1.png b/docs/reference/model-method-variational-1.png index d7b8b97f8..27fd88217 100644 Binary files a/docs/reference/model-method-variational-1.png and b/docs/reference/model-method-variational-1.png differ diff --git a/docs/reference/model-method-variational-2.png b/docs/reference/model-method-variational-2.png index 093449f41..4ed9fe5c5 100644 Binary files a/docs/reference/model-method-variational-2.png and b/docs/reference/model-method-variational-2.png differ diff --git a/docs/reference/model-method-variational.html b/docs/reference/model-method-variational.html index ce353cf1f..e5fdeb91e 100644 --- a/docs/reference/model-method-variational.html +++ b/docs/reference/model-method-variational.html @@ -1,80 +1,17 @@ - - - - - - - -Run Stan's variational approximation algorithms — model-method-variational • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Run Stan's variational approximation algorithms — model-method-variational • cmdstanr - - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +
    -

    The $variational() method of a CmdStanModel object runs +

    The $variational() method of a CmdStanModel object runs Stan's variational Bayes (ADVI) algorithms.

    Any argument left as NULL will default to the default value used by the installed version of CmdStan. See the -CmdStan User’s Guide +CmdStan User’s Guide for more details.

    -
    variational(
    -  data = NULL,
    -  seed = NULL,
    -  refresh = NULL,
    -  init = NULL,
    -  save_latent_dynamics = FALSE,
    -  output_dir = NULL,
    -  output_basename = NULL,
    -  sig_figs = NULL,
    -  threads = NULL,
    -  opencl_ids = NULL,
    -  algorithm = NULL,
    -  iter = NULL,
    -  grad_samples = NULL,
    -  elbo_samples = NULL,
    -  eta = NULL,
    -  adapt_engaged = NULL,
    -  adapt_iter = NULL,
    -  tol_rel_obj = NULL,
    -  eval_elbo = NULL,
    -  output_samples = NULL
    -)
    - -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
    data

    (multiple options) The data to use for the variables specified in -the data block of the Stan program. One of the following:

      -
    • A named list of R objects with the names corresponding to variables +

      +
      variational(
      +  data = NULL,
      +  seed = NULL,
      +  refresh = NULL,
      +  init = NULL,
      +  save_latent_dynamics = FALSE,
      +  output_dir = NULL,
      +  output_basename = NULL,
      +  sig_figs = NULL,
      +  threads = NULL,
      +  opencl_ids = NULL,
      +  algorithm = NULL,
      +  iter = NULL,
      +  grad_samples = NULL,
      +  elbo_samples = NULL,
      +  eta = NULL,
      +  adapt_engaged = NULL,
      +  adapt_iter = NULL,
      +  tol_rel_obj = NULL,
      +  eval_elbo = NULL,
      +  output_samples = NULL
      +)
      +
      + +
      +

      Arguments

      +
      data
      +

      (multiple options) The data to use for the variables specified in +the data block of the Stan program. One of the following:

      • A named list of R objects with the names corresponding to variables declared in the data block of the Stan program. Internally this list is then -written to JSON for CmdStan using write_stan_json(). See -write_stan_json() for details on the conversions performed on R objects +written to JSON for CmdStan using write_stan_json(). See +write_stan_json() for details on the conversions performed on R objects before they are passed to Stan.

      • A path to a data file compatible with CmdStan (JSON or R dump). See the appendices in the CmdStan guide for details on using these formats.

      • NULL or an empty list if the Stan program has no data block.

      • -
    seed

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. + + + +

    seed
    +

    (positive integer(s)) A seed for the (P)RNG to pass to CmdStan. In the case of multi-chain sampling the single seed will automatically be augmented by the the run (chain) ID so that each chain uses a different seed. The exception is the transformed data block, which defaults to @@ -239,25 +166,24 @@

    Arg chains if RNG functions are used. The only time seed should be specified as a vector (one element per chain) is if RNG functions are used in transformed data and the goal is to generate different data for each -chain.

    refresh

    (non-negative integer) The number of iterations between +chain.

    + + +
    refresh
    +

    (non-negative integer) The number of iterations between printed screen updates. If refresh = 0, only error messages will be -printed.

    init

    (multiple options) The initialization method to use for the +printed.

    + + +
    init
    +

    (multiple options) The initialization method to use for the variables declared in the parameters block of the Stan program. One of -the following:

      -
    • A real number x>0. This initializes all parameters randomly between +the following:

      • A real number x>0. This initializes all parameters randomly between [-x,x] on the unconstrained parameter space.;

      • The number 0. This initializes all parameters to 0;

      • A character vector of paths (one per chain) to JSON or Rdump files containing initial values for all or some parameters. See -write_stan_json() to write R objects to JSON files compatible with +write_stan_json() to write R objects to JSON files compatible with CmdStan.

      • A list of lists containing initial values for all or some parameters. For MCMC the list should contain a sublist for each chain. For optimization and @@ -270,500 +196,521 @@

        Arg has argument chain_id it will be supplied with the chain id (from 1 to number of chains) when called to generate the initial values. See Examples.

      • -
    save_latent_dynamics

    (logical) Should auxiliary diagnostic information + + + +

    save_latent_dynamics
    +

    (logical) Should auxiliary diagnostic information about the latent dynamics be written to temporary diagnostic CSV files? This argument replaces CmdStan's diagnostic_file argument and the content written to CSV is controlled by the user's CmdStan installation and not CmdStanR (for some algorithms no content may be written). The default is FALSE, which is appropriate for almost every use case. To save the temporary files created when save_latent_dynamics=TRUE see the -$save_latent_dynamics_files() -method.

    output_dir

    (string) A path to a directory where CmdStan should write +$save_latent_dynamics_files() +method.

    + + +
    output_dir
    +

    (string) A path to a directory where CmdStan should write its output CSV files. For interactive use this can typically be left at NULL (temporary directory) since CmdStanR makes the CmdStan output (posterior draws and diagnostics) available in R via methods of the fitted -model objects. The behavior of output_dir is as follows:

      -
    • If NULL (the default), then the CSV files are written to a temporary +model objects. The behavior of output_dir is as follows:

      • If NULL (the default), then the CSV files are written to a temporary directory and only saved permanently if the user calls one of the $save_* methods of the fitted model object (e.g., -$save_output_files()). These temporary +$save_output_files()). These temporary files are removed when the fitted model object is -garbage collected (manually or automatically).

      • +garbage collected (manually or automatically).

      • If a path, then the files are created in output_dir with names corresponding to the defaults used by $save_output_files().

      • -
    output_basename

    (string) A string to use as a prefix for the names of + + + +

    output_basename
    +

    (string) A string to use as a prefix for the names of the output CSV files of CmdStan. If NULL (the default), the basename of the output CSV files will be comprised from the model name, timestamp, and -5 random characters.

    sig_figs

    (positive integer) The number of significant figures used +5 random characters.

    + + +
    sig_figs
    +

    (positive integer) The number of significant figures used when storing the output values. By default, CmdStan represent the output values with 6 significant figures. The upper limit for sig_figs is 18. Increasing this value will result in larger output CSV files and thus an -increased usage of disk space.

    threads

    (positive integer) If the model was -compiled with threading support, the number of +increased usage of disk space.

    + + +
    threads
    +

    (positive integer) If the model was +compiled with threading support, the number of threads to use in parallelized sections (e.g., when using the Stan -functions reduce_sum() or map_rect()).

    opencl_ids

    (integer vector of length 2) The platform and +functions reduce_sum() or map_rect()).

    + + +
    opencl_ids
    +

    (integer vector of length 2) The platform and device IDs of the OpenCL device to use for fitting. The model must be compiled with cpp_options = list(stan_opencl = TRUE) for this -argument to have an effect.

    algorithm

    (string) The algorithm. Either "meanfield" or -"fullrank".

    iter

    (positive integer) The maximum number of iterations.

    grad_samples

    (positive integer) The number of samples for Monte Carlo -estimate of gradients.

    elbo_samples

    (positive integer) The number of samples for Monte Carlo -estimate of ELBO (objective function).

    eta

    (positive real) The step size weighting parameter for adaptive -step size sequence.

    adapt_engaged

    (logical) Do warmup adaptation?

    adapt_iter

    (positive integer) The maximum number of adaptation -iterations.

    tol_rel_obj

    (positive real) Convergence tolerance on the relative norm -of the objective.

    eval_elbo

    (positive integer) Evaluate ELBO every Nth iteration.

    output_samples

    (positive integer) Number of approximate posterior -samples to draw and save.

    - -

    Value

    - -

    A CmdStanVB object.

    -

    Details

    +argument to have an effect.

    + + +
    algorithm
    +

    (string) The algorithm. Either "meanfield" or +"fullrank".

    + + +
    iter
    +

    (positive integer) The maximum number of iterations.

    + + +
    grad_samples
    +

    (positive integer) The number of samples for Monte Carlo +estimate of gradients.

    + + +
    elbo_samples
    +

    (positive integer) The number of samples for Monte Carlo +estimate of ELBO (objective function).

    + + +
    eta
    +

    (positive real) The step size weighting parameter for adaptive +step size sequence.

    + +
    adapt_engaged
    +

    (logical) Do warmup adaptation?

    + + +
    adapt_iter
    +

    (positive integer) The maximum number of adaptation +iterations.

    + + +
    tol_rel_obj
    +

    (positive real) Convergence tolerance on the relative norm +of the objective.

    + + +
    eval_elbo
    +

    (positive integer) Evaluate ELBO every Nth iteration.

    + + +
    output_samples
    +

    (positive integer) Number of approximate posterior +samples to draw and save.

    + +
    +
    +

    Value

    + + +

    A CmdStanVB object.

    +
    +
    +

    Details

    CmdStan can fit a variational approximation to the posterior. The approximation is a Gaussian in the unconstrained variable space. Stan implements two variational algorithms. The algorithm="meanfield" option uses a fully factorized Gaussian for the approximation. The algorithm="fullrank" option uses a Gaussian with a full-rank covariance matrix for the approximation.

    -

    -- CmdStan Interface User's Guide

    -

    See also

    - -

    The CmdStanR website -(mc-stan.org/cmdstanr) for online +

    -- CmdStan Interface User's Guide

    +
    +
    +

    See also

    +

    The CmdStanR website +(mc-stan.org/cmdstanr) for online documentation and tutorials.

    -

    The Stan and CmdStan documentation:

    - -

    Other CmdStanModel methods: -model-method-check_syntax, -model-method-compile, -model-method-diagnose, -model-method-format, -model-method-generate-quantities, -model-method-optimize, -model-method-sample_mpi, -model-method-sample, -model-method-variables

    - -

    Examples

    -
    # \dontrun{ -library(cmdstanr) -library(posterior) -library(bayesplot) -color_scheme_set("brightblue") - -# Set path to CmdStan -# (Note: if you installed CmdStan via install_cmdstan() with default settings -# then setting the path is unnecessary but the default below should still work. -# Otherwise use the `path` argument to specify the location of your -# CmdStan installation.) -set_cmdstan_path(path = NULL) -
    #> CmdStan path set to: /Users/jgabry/.cmdstan/cmdstan-2.29.1
    -# Create a CmdStanModel object from a Stan program, -# here using the example model that comes with CmdStan -file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan") -mod <- cmdstan_model(file) -mod$print() -
    #> data { -#> int<lower=0> N; -#> array[N] int<lower=0,upper=1> y; // or int<lower=0,upper=1> y[N]; -#> } -#> parameters { -#> real<lower=0,upper=1> theta; -#> } -#> model { -#> theta ~ beta(1,1); // uniform prior on interval 0,1 -#> y ~ bernoulli(theta); -#> }
    -# Data as a named list (like RStan) -stan_data <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1)) - -# Run MCMC using the 'sample' method -fit_mcmc <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - parallel_chains = 2 -) -
    #> Running MCMC with 2 parallel chains... -#> -#> Chain 1 Iteration: 1 / 2000 [ 0%] (Warmup) -#> Chain 1 Iteration: 100 / 2000 [ 5%] (Warmup) -#> Chain 1 Iteration: 200 / 2000 [ 10%] (Warmup) -#> Chain 1 Iteration: 300 / 2000 [ 15%] (Warmup) -#> Chain 1 Iteration: 400 / 2000 [ 20%] (Warmup) -#> Chain 1 Iteration: 500 / 2000 [ 25%] (Warmup) -#> Chain 1 Iteration: 600 / 2000 [ 30%] (Warmup) -#> Chain 1 Iteration: 700 / 2000 [ 35%] (Warmup) -#> Chain 1 Iteration: 800 / 2000 [ 40%] (Warmup) -#> Chain 1 Iteration: 900 / 2000 [ 45%] (Warmup) -#> Chain 1 Iteration: 1000 / 2000 [ 50%] (Warmup) -#> Chain 1 Iteration: 1001 / 2000 [ 50%] (Sampling) -#> Chain 1 Iteration: 1100 / 2000 [ 55%] (Sampling) -#> Chain 1 Iteration: 1200 / 2000 [ 60%] (Sampling) -#> Chain 1 Iteration: 1300 / 2000 [ 65%] (Sampling) -#> Chain 1 Iteration: 1400 / 2000 [ 70%] (Sampling) -#> Chain 1 Iteration: 1500 / 2000 [ 75%] (Sampling) -#> Chain 1 Iteration: 1600 / 2000 [ 80%] (Sampling) -#> Chain 1 Iteration: 1700 / 2000 [ 85%] (Sampling) -#> Chain 1 Iteration: 1800 / 2000 [ 90%] (Sampling) -#> Chain 1 Iteration: 1900 / 2000 [ 95%] (Sampling) -#> Chain 1 Iteration: 2000 / 2000 [100%] (Sampling) -#> Chain 2 Iteration: 1 / 2000 [ 0%] (Warmup) -#> Chain 2 Iteration: 100 / 2000 [ 5%] (Warmup) -#> Chain 2 Iteration: 200 / 2000 [ 10%] (Warmup) -#> Chain 2 Iteration: 300 / 2000 [ 15%] (Warmup) -#> Chain 2 Iteration: 400 / 2000 [ 20%] (Warmup) -#> Chain 2 Iteration: 500 / 2000 [ 25%] (Warmup) -#> Chain 2 Iteration: 600 / 2000 [ 30%] (Warmup) -#> Chain 2 Iteration: 700 / 2000 [ 35%] (Warmup) -#> Chain 2 Iteration: 800 / 2000 [ 40%] (Warmup) -#> Chain 2 Iteration: 900 / 2000 [ 45%] (Warmup) -#> Chain 2 Iteration: 1000 / 2000 [ 50%] (Warmup) -#> Chain 2 Iteration: 1001 / 2000 [ 50%] (Sampling) -#> Chain 2 Iteration: 1100 / 2000 [ 55%] (Sampling) -#> Chain 2 Iteration: 1200 / 2000 [ 60%] (Sampling) -#> Chain 2 Iteration: 1300 / 2000 [ 65%] (Sampling) -#> Chain 2 Iteration: 1400 / 2000 [ 70%] (Sampling) -#> Chain 2 Iteration: 1500 / 2000 [ 75%] (Sampling) -#> Chain 2 Iteration: 1600 / 2000 [ 80%] (Sampling) -#> Chain 2 Iteration: 1700 / 2000 [ 85%] (Sampling) -#> Chain 2 Iteration: 1800 / 2000 [ 90%] (Sampling) -#> Chain 2 Iteration: 1900 / 2000 [ 95%] (Sampling) -#> Chain 2 Iteration: 2000 / 2000 [100%] (Sampling) -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.2 seconds. -#>
    -# Use 'posterior' package for summaries -fit_mcmc$summary() -
    #> # A tibble: 2 × 10 -#> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -7.30 -7.03 0.721 0.380 -8.82 -6.75 1.00 902. 1006. -#> 2 theta 0.247 0.233 0.122 0.129 0.0786 0.470 1.00 762. 712.
    -# Get posterior draws -draws <- fit_mcmc$draws() -print(draws) -
    #> # A draws_array: 1000 iterations, 2 chains, and 2 variables -#> , , variable = lp__ -#> -#> chain -#> iteration 1 2 -#> 1 -6.8 -6.8 -#> 2 -6.9 -6.8 -#> 3 -7.0 -7.0 -#> 4 -6.9 -7.1 -#> 5 -6.7 -7.0 -#> -#> , , variable = theta -#> -#> chain -#> iteration 1 2 -#> 1 0.28 0.21 -#> 2 0.19 0.20 -#> 3 0.16 0.17 -#> 4 0.20 0.36 -#> 5 0.25 0.34 -#> -#> # ... with 995 more iterations
    -# Convert to data frame using posterior::as_draws_df -as_draws_df(draws) -
    #> # A draws_df: 1000 iterations, 2 chains, and 2 variables -#> lp__ theta -#> 1 -6.8 0.28 -#> 2 -6.9 0.19 -#> 3 -7.0 0.16 -#> 4 -6.9 0.20 -#> 5 -6.7 0.25 -#> 6 -7.1 0.36 -#> 7 -9.0 0.55 -#> 8 -7.2 0.15 -#> 9 -6.8 0.23 -#> 10 -7.5 0.42 -#> # ... with 1990 more draws -#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    -# Plot posterior using bayesplot (ggplot2) -mcmc_hist(fit_mcmc$draws("theta")) -
    #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    -# Call CmdStan's diagnose and stansummary utilities -fit_mcmc$cmdstan_diagnose() -
    #> Processing csv files: /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/bernoulli-202203181227-1-1c6def.csv, /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/bernoulli-202203181227-2-1c6def.csv -#> -#> Checking sampler transitions treedepth. -#> Treedepth satisfactory for all transitions. -#> -#> Checking sampler transitions for divergences. -#> No divergent transitions found. -#> -#> Checking E-BFMI - sampler transitions HMC potential energy. -#> E-BFMI satisfactory. -#> -#> Effective sample size satisfactory. -#> -#> Split R-hat values satisfactory all parameters. -#> -#> Processing complete, no problems detected.
    fit_mcmc$cmdstan_summary() -
    #> Inference for Stan model: bernoulli_model -#> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved. -#> -#> Warmup took (0.0050, 0.0050) seconds, 0.010 seconds total -#> Sampling took (0.019, 0.016) seconds, 0.035 seconds total -#> -#> Mean MCSE StdDev 5% 50% 95% N_Eff N_Eff/s R_hat -#> -#> lp__ -7.3 2.6e-02 0.72 -8.8 -7.0 -6.8 781 22315 1.0 -#> accept_stat__ 0.92 8.3e-03 0.13 0.64 0.97 1.0 2.3e+02 6.7e+03 1.0e+00 -#> stepsize__ 0.95 7.9e-02 0.079 0.87 1.0 1.0 1.0e+00 2.9e+01 2.0e+13 -#> treedepth__ 1.4 1.1e-02 0.48 1.0 1.0 2.0 1.9e+03 5.4e+04 1.0e+00 -#> n_leapfrog__ 2.5 1.4e-01 1.3 1.0 3.0 3.0 8.9e+01 2.5e+03 1.0e+00 -#> divergent__ 0.00 nan 0.00 0.00 0.00 0.00 nan nan nan -#> energy__ 7.8 3.6e-02 1.00 6.8 7.5 9.6 7.7e+02 2.2e+04 1.0e+00 -#> -#> theta 0.25 4.3e-03 0.12 0.079 0.23 0.47 796 22752 1.0 -#> -#> Samples were drawn using hmc with nuts. -#> For each parameter, N_Eff is a crude measure of effective sample size, -#> and R_hat is the potential scale reduction factor on split chains (at -#> convergence, R_hat=1).
    -# For models fit using MCMC, if you like working with RStan's stanfit objects -# then you can create one with rstan::read_stan_csv() - -# stanfit <- rstan::read_stan_csv(fit_mcmc$output_files()) - - -# Run 'optimize' method to get a point estimate (default is Stan's LBFGS algorithm) -# and also demonstrate specifying data as a path to a file instead of a list -my_data_file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.data.json") -fit_optim <- mod$optimize(data = my_data_file, seed = 123) -
    #> Initial log joint probability = -9.51104 -#> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes -#> 6 -5.00402 0.000103557 2.55661e-07 1 1 9 -#> Optimization terminated normally: -#> Convergence detected: relative gradient magnitude is below tolerance -#> Finished in 0.1 seconds.
    -fit_optim$summary() -
    #> # A tibble: 2 × 2 -#> variable estimate -#> <chr> <dbl> -#> 1 lp__ -5.00 -#> 2 theta 0.2
    - -# Run 'variational' method to approximate the posterior (default is meanfield ADVI) -fit_vb <- mod$variational(data = stan_data, seed = 123) -
    #> ------------------------------------------------------------ -#> EXPERIMENTAL ALGORITHM: -#> This procedure has not been thoroughly tested and may be unstable -#> or buggy. The interface is subject to change. -#> ------------------------------------------------------------ -#> Gradient evaluation took 9e-06 seconds -#> 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds. -#> Adjust your expectations accordingly! -#> Begin eta adaptation. -#> Iteration: 1 / 250 [ 0%] (Adaptation) -#> Iteration: 50 / 250 [ 20%] (Adaptation) -#> Iteration: 100 / 250 [ 40%] (Adaptation) -#> Iteration: 150 / 250 [ 60%] (Adaptation) -#> Iteration: 200 / 250 [ 80%] (Adaptation) -#> Success! Found best value [eta = 1] earlier than expected. -#> Begin stochastic gradient ascent. -#> iter ELBO delta_ELBO_mean delta_ELBO_med notes -#> 100 -6.262 1.000 1.000 -#> 200 -6.263 0.500 1.000 -#> 300 -6.307 0.336 0.007 MEDIAN ELBO CONVERGED -#> Drawing a sample of size 1000 from the approximate posterior... -#> COMPLETED. -#> Finished in 0.1 seconds.
    -fit_vb$summary() -
    #> # A tibble: 3 × 7 -#> variable mean median sd mad q5 q95 -#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> -#> 1 lp__ -7.18 -6.94 0.588 0.259 -8.36 -6.75 -#> 2 lp_approx__ -0.515 -0.221 0.692 0.303 -2.06 -0.00257 -#> 3 theta 0.263 0.246 0.115 0.113 0.106 0.481
    -# Plot approximate posterior using bayesplot -mcmc_hist(fit_vb$draws("theta")) -
    #> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    - -# Specifying initial values as a function -fit_mcmc_w_init_fun <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = function() list(theta = runif(1)) -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds. -#>
    fit_mcmc_w_init_fun_2 <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = function(chain_id) { - # silly but demonstrates optional use of chain_id - list(theta = 1 / (chain_id + 1)) - } -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds. -#>
    fit_mcmc_w_init_fun_2$init() -
    #> [[1]] -#> [[1]]$theta -#> [1] 0.5 -#> -#> -#> [[2]] -#> [[2]]$theta -#> [1] 0.3333333 -#> -#>
    -# Specifying initial values as a list of lists -fit_mcmc_w_init_list <- mod$sample( - data = stan_data, - seed = 123, - chains = 2, - refresh = 0, - init = list( - list(theta = 0.75), # chain 1 - list(theta = 0.25) # chain 2 - ) -) -
    #> Running MCMC with 2 sequential chains... -#> -#> Chain 1 finished in 0.0 seconds. -#> Chain 2 finished in 0.0 seconds. -#> -#> Both chains finished successfully. -#> Mean chain execution time: 0.0 seconds. -#> Total execution time: 0.3 seconds. -#>
    fit_optim_w_init_list <- mod$optimize( - data = stan_data, - seed = 123, - init = list( - list(theta = 0.75) - ) -) -
    #> Initial log joint probability = -11.6657 -#> Iter log prob ||dx|| ||grad|| alpha alpha0 # evals Notes -#> 6 -5.00402 0.000237915 9.55309e-07 1 1 9 -#> Optimization terminated normally: -#> Convergence detected: relative gradient magnitude is below tolerance -#> Finished in 0.1 seconds.
    fit_optim_w_init_list$init() -
    #> [[1]] -#> [[1]]$theta -#> [1] 0.75 -#> -#>
    # } - -
    +

    The Stan and CmdStan documentation:

    Other CmdStanModel methods: +model-method-check_syntax, +model-method-compile, +model-method-diagnose, +model-method-expose_functions, +model-method-format, +model-method-generate-quantities, +model-method-optimize, +model-method-sample_mpi, +model-method-sample, +model-method-variables

    +
    + +
    +

    Examples

    +
    # \dontrun{
    +library(cmdstanr)
    +library(posterior)
    +library(bayesplot)
    +color_scheme_set("brightblue")
    +
    +# Set path to CmdStan
    +# (Note: if you installed CmdStan via install_cmdstan() with default settings
    +# then setting the path is unnecessary but the default below should still work.
    +# Otherwise use the `path` argument to specify the location of your
    +# CmdStan installation.)
    +set_cmdstan_path(path = NULL)
    +#> CmdStan path set to: /Users/jgabry/.cmdstan/cmdstan-2.32.2
    +
    +# Create a CmdStanModel object from a Stan program,
    +# here using the example model that comes with CmdStan
    +file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.stan")
    +mod <- cmdstan_model(file)
    +mod$print()
    +#> data {
    +#>   int<lower=0> N;
    +#>   array[N] int<lower=0,upper=1> y;
    +#> }
    +#> parameters {
    +#>   real<lower=0,upper=1> theta;
    +#> }
    +#> model {
    +#>   theta ~ beta(1,1);  // uniform prior on interval 0,1
    +#>   y ~ bernoulli(theta);
    +#> }
    +
    +# Data as a named list (like RStan)
    +stan_data <- list(N = 10, y = c(0,1,0,0,0,0,0,0,0,1))
    +
    +# Run MCMC using the 'sample' method
    +fit_mcmc <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  parallel_chains = 2
    +)
    +#> Running MCMC with 2 parallel chains...
    +#> 
    +#> Chain 1 Iteration:    1 / 2000 [  0%]  (Warmup) 
    +#> Chain 1 Iteration:  100 / 2000 [  5%]  (Warmup) 
    +#> Chain 1 Iteration:  200 / 2000 [ 10%]  (Warmup) 
    +#> Chain 1 Iteration:  300 / 2000 [ 15%]  (Warmup) 
    +#> Chain 1 Iteration:  400 / 2000 [ 20%]  (Warmup) 
    +#> Chain 1 Iteration:  500 / 2000 [ 25%]  (Warmup) 
    +#> Chain 1 Iteration:  600 / 2000 [ 30%]  (Warmup) 
    +#> Chain 1 Iteration:  700 / 2000 [ 35%]  (Warmup) 
    +#> Chain 1 Iteration:  800 / 2000 [ 40%]  (Warmup) 
    +#> Chain 1 Iteration:  900 / 2000 [ 45%]  (Warmup) 
    +#> Chain 1 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
    +#> Chain 1 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
    +#> Chain 1 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
    +#> Chain 1 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
    +#> Chain 1 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
    +#> Chain 1 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
    +#> Chain 1 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
    +#> Chain 1 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
    +#> Chain 1 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
    +#> Chain 1 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
    +#> Chain 1 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
    +#> Chain 1 Iteration: 2000 / 2000 [100%]  (Sampling) 
    +#> Chain 2 Iteration:    1 / 2000 [  0%]  (Warmup) 
    +#> Chain 2 Iteration:  100 / 2000 [  5%]  (Warmup) 
    +#> Chain 2 Iteration:  200 / 2000 [ 10%]  (Warmup) 
    +#> Chain 2 Iteration:  300 / 2000 [ 15%]  (Warmup) 
    +#> Chain 2 Iteration:  400 / 2000 [ 20%]  (Warmup) 
    +#> Chain 2 Iteration:  500 / 2000 [ 25%]  (Warmup) 
    +#> Chain 2 Iteration:  600 / 2000 [ 30%]  (Warmup) 
    +#> Chain 2 Iteration:  700 / 2000 [ 35%]  (Warmup) 
    +#> Chain 2 Iteration:  800 / 2000 [ 40%]  (Warmup) 
    +#> Chain 2 Iteration:  900 / 2000 [ 45%]  (Warmup) 
    +#> Chain 2 Iteration: 1000 / 2000 [ 50%]  (Warmup) 
    +#> Chain 2 Iteration: 1001 / 2000 [ 50%]  (Sampling) 
    +#> Chain 2 Iteration: 1100 / 2000 [ 55%]  (Sampling) 
    +#> Chain 2 Iteration: 1200 / 2000 [ 60%]  (Sampling) 
    +#> Chain 2 Iteration: 1300 / 2000 [ 65%]  (Sampling) 
    +#> Chain 2 Iteration: 1400 / 2000 [ 70%]  (Sampling) 
    +#> Chain 2 Iteration: 1500 / 2000 [ 75%]  (Sampling) 
    +#> Chain 2 Iteration: 1600 / 2000 [ 80%]  (Sampling) 
    +#> Chain 2 Iteration: 1700 / 2000 [ 85%]  (Sampling) 
    +#> Chain 2 Iteration: 1800 / 2000 [ 90%]  (Sampling) 
    +#> Chain 2 Iteration: 1900 / 2000 [ 95%]  (Sampling) 
    +#> Chain 2 Iteration: 2000 / 2000 [100%]  (Sampling) 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.2 seconds.
    +#> 
    +
    +# Use 'posterior' package for summaries
    +fit_mcmc$summary()
    +#> # A tibble: 2 × 10
    +#>   variable   mean median    sd   mad      q5    q95  rhat ess_bulk ess_tail
    +#>   <chr>     <num>  <num> <num> <num>   <num>  <num> <num>    <num>    <num>
    +#> 1 lp__     -7.30  -7.03  0.721 0.380 -8.82   -6.75   1.00     902.    1006.
    +#> 2 theta     0.247  0.233 0.122 0.129  0.0786  0.470  1.00     762.     712.
    +
    +# Get posterior draws
    +draws <- fit_mcmc$draws()
    +print(draws)
    +#> # A draws_array: 1000 iterations, 2 chains, and 2 variables
    +#> , , variable = lp__
    +#> 
    +#>          chain
    +#> iteration    1    2
    +#>         1 -6.8 -6.8
    +#>         2 -6.9 -6.8
    +#>         3 -7.0 -7.0
    +#>         4 -6.9 -7.1
    +#>         5 -6.7 -7.0
    +#> 
    +#> , , variable = theta
    +#> 
    +#>          chain
    +#> iteration    1    2
    +#>         1 0.28 0.21
    +#>         2 0.19 0.20
    +#>         3 0.16 0.17
    +#>         4 0.20 0.36
    +#>         5 0.25 0.34
    +#> 
    +#> # ... with 995 more iterations
    +
    +# Convert to data frame using posterior::as_draws_df
    +as_draws_df(draws)
    +#> # A draws_df: 1000 iterations, 2 chains, and 2 variables
    +#>    lp__ theta
    +#> 1  -6.8  0.28
    +#> 2  -6.9  0.19
    +#> 3  -7.0  0.16
    +#> 4  -6.9  0.20
    +#> 5  -6.7  0.25
    +#> 6  -7.1  0.36
    +#> 7  -9.0  0.55
    +#> 8  -7.2  0.15
    +#> 9  -6.8  0.23
    +#> 10 -7.5  0.42
    +#> # ... with 1990 more draws
    +#> # ... hidden reserved variables {'.chain', '.iteration', '.draw'}
    +
    +# Plot posterior using bayesplot (ggplot2)
    +mcmc_hist(fit_mcmc$draws("theta"))
    +#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    +
    +
    +# Call CmdStan's diagnose and stansummary utilities
    +fit_mcmc$cmdstan_diagnose()
    +#> Processing csv files: /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/bernoulli-202307251438-1-4ea737.csv, /var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/bernoulli-202307251438-2-4ea737.csv
    +#> 
    +#> Checking sampler transitions treedepth.
    +#> Treedepth satisfactory for all transitions.
    +#> 
    +#> Checking sampler transitions for divergences.
    +#> No divergent transitions found.
    +#> 
    +#> Checking E-BFMI - sampler transitions HMC potential energy.
    +#> E-BFMI satisfactory.
    +#> 
    +#> Effective sample size satisfactory.
    +#> 
    +#> Split R-hat values satisfactory all parameters.
    +#> 
    +#> Processing complete, no problems detected.
    +fit_mcmc$cmdstan_summary()
    +#> Inference for Stan model: bernoulli_model
    +#> 2 chains: each with iter=(1000,1000); warmup=(0,0); thin=(1,1); 2000 iterations saved.
    +#> 
    +#> Warmup took (0.0040, 0.0040) seconds, 0.0080 seconds total
    +#> Sampling took (0.011, 0.011) seconds, 0.022 seconds total
    +#> 
    +#>                 Mean     MCSE  StdDev     5%   50%   95%  N_Eff  N_Eff/s    R_hat
    +#> 
    +#> lp__            -7.3  2.6e-02    0.72   -8.8  -7.0  -6.8    781    35502      1.0
    +#> accept_stat__   0.92  8.3e-03    0.13   0.64  0.97   1.0    235    10662  1.0e+00
    +#> stepsize__      0.95  7.9e-02   0.079   0.87   1.0   1.0    1.0       46  2.0e+13
    +#> treedepth__      1.4  1.1e-02    0.48    1.0   1.0   2.0   1874    85179  1.0e+00
    +#> n_leapfrog__     2.5  1.4e-01     1.3    1.0   3.0   3.0     89     4050  1.0e+00
    +#> divergent__     0.00      nan    0.00   0.00  0.00  0.00    nan      nan      nan
    +#> energy__         7.8  3.6e-02    1.00    6.8   7.5   9.6    775    35215  1.0e+00
    +#> 
    +#> theta           0.25  4.3e-03    0.12  0.079  0.23  0.47    796    36197      1.0
    +#> 
    +#> Samples were drawn using hmc with nuts.
    +#> For each parameter, N_Eff is a crude measure of effective sample size,
    +#> and R_hat is the potential scale reduction factor on split chains (at 
    +#> convergence, R_hat=1).
    +
    +# For models fit using MCMC, if you like working with RStan's stanfit objects
    +# then you can create one with rstan::read_stan_csv()
    +
    +# stanfit <- rstan::read_stan_csv(fit_mcmc$output_files())
    +
    +
    +# Run 'optimize' method to get a point estimate (default is Stan's LBFGS algorithm)
    +# and also demonstrate specifying data as a path to a file instead of a list
    +my_data_file <- file.path(cmdstan_path(), "examples/bernoulli/bernoulli.data.json")
    +fit_optim <- mod$optimize(data = my_data_file, seed = 123)
    +#> Initial log joint probability = -9.51104 
    +#>     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes  
    +#>        6      -5.00402   0.000103557   2.55661e-07           1           1        9    
    +#> Optimization terminated normally:  
    +#>   Convergence detected: relative gradient magnitude is below tolerance 
    +#> Finished in  0.1 seconds.
    +
    +fit_optim$summary()
    +#> # A tibble: 2 × 2
    +#>   variable estimate
    +#>   <chr>       <num>
    +#> 1 lp__        -5.00
    +#> 2 theta        0.2 
    +
    +
    +# Run 'variational' method to approximate the posterior (default is meanfield ADVI)
    +fit_vb <- mod$variational(data = stan_data, seed = 123)
    +#> ------------------------------------------------------------ 
    +#> EXPERIMENTAL ALGORITHM: 
    +#>   This procedure has not been thoroughly tested and may be unstable 
    +#>   or buggy. The interface is subject to change. 
    +#> ------------------------------------------------------------ 
    +#> Gradient evaluation took 9e-06 seconds 
    +#> 1000 transitions using 10 leapfrog steps per transition would take 0.09 seconds. 
    +#> Adjust your expectations accordingly! 
    +#> Begin eta adaptation. 
    +#> Iteration:   1 / 250 [  0%]  (Adaptation) 
    +#> Iteration:  50 / 250 [ 20%]  (Adaptation) 
    +#> Iteration: 100 / 250 [ 40%]  (Adaptation) 
    +#> Iteration: 150 / 250 [ 60%]  (Adaptation) 
    +#> Iteration: 200 / 250 [ 80%]  (Adaptation) 
    +#> Success! Found best value [eta = 1] earlier than expected. 
    +#> Begin stochastic gradient ascent. 
    +#>   iter             ELBO   delta_ELBO_mean   delta_ELBO_med   notes  
    +#>    100           -6.262             1.000            1.000 
    +#>    200           -6.263             0.500            1.000 
    +#>    300           -6.307             0.336            0.007   MEDIAN ELBO CONVERGED 
    +#> Drawing a sample of size 1000 from the approximate posterior...  
    +#> COMPLETED. 
    +#> Finished in  0.1 seconds.
    +
    +fit_vb$summary()
    +#> # A tibble: 3 × 7
    +#>   variable      mean median    sd   mad     q5      q95
    +#>   <chr>        <num>  <num> <num> <num>  <num>    <num>
    +#> 1 lp__        -7.18  -6.94  0.588 0.259 -8.36  -6.75   
    +#> 2 lp_approx__ -0.515 -0.221 0.692 0.303 -2.06  -0.00257
    +#> 3 theta        0.263  0.246 0.115 0.113  0.106  0.481  
    +
    +# Plot approximate posterior using bayesplot
    +mcmc_hist(fit_vb$draws("theta"))
    +#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    +
    +
    +
    +# Specifying initial values as a function
    +fit_mcmc_w_init_fun <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = function() list(theta = runif(1))
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_mcmc_w_init_fun_2 <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = function(chain_id) {
    +    # silly but demonstrates optional use of chain_id
    +    list(theta = 1 / (chain_id + 1))
    +  }
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_mcmc_w_init_fun_2$init()
    +#> [[1]]
    +#> [[1]]$theta
    +#> [1] 0.5
    +#> 
    +#> 
    +#> [[2]]
    +#> [[2]]$theta
    +#> [1] 0.3333333
    +#> 
    +#> 
    +
    +# Specifying initial values as a list of lists
    +fit_mcmc_w_init_list <- mod$sample(
    +  data = stan_data,
    +  seed = 123,
    +  chains = 2,
    +  refresh = 0,
    +  init = list(
    +    list(theta = 0.75), # chain 1
    +    list(theta = 0.25)  # chain 2
    +  )
    +)
    +#> Running MCMC with 2 sequential chains...
    +#> 
    +#> Chain 1 finished in 0.0 seconds.
    +#> Chain 2 finished in 0.0 seconds.
    +#> 
    +#> Both chains finished successfully.
    +#> Mean chain execution time: 0.0 seconds.
    +#> Total execution time: 0.3 seconds.
    +#> 
    +fit_optim_w_init_list <- mod$optimize(
    +  data = stan_data,
    +  seed = 123,
    +  init = list(
    +    list(theta = 0.75)
    +  )
    +)
    +#> Initial log joint probability = -11.6657 
    +#>     Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes  
    +#>        6      -5.00402   0.000237915   9.55309e-07           1           1        9    
    +#> Optimization terminated normally:  
    +#>   Convergence detected: relative gradient magnitude is below tolerance 
    +#> Finished in  0.1 seconds.
    +fit_optim_w_init_list$init()
    +#> [[1]]
    +#> [[1]]$theta
    +#> [1] 0.75
    +#> 
    +#> 
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/read_cmdstan_csv.html b/docs/reference/read_cmdstan_csv.html index ea17d8efc..2106577ef 100644 --- a/docs/reference/read_cmdstan_csv.html +++ b/docs/reference/read_cmdstan_csv.html @@ -1,80 +1,17 @@ - - - - - - - -Read CmdStan CSV files into R — read_cmdstan_csv • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Read CmdStan CSV files into R — read_cmdstan_csv • cmdstanr - - - - - - - - - - - + + - - - -
    -
    - -
    - -
    +

    read_cmdstan_csv() is used internally by CmdStanR to read CmdStan's output CSV files into R. It can also be used by CmdStan users as -a more flexible and efficient alternative to rstan::read_stan_csv(). See +a more flexible and efficient alternative to rstan::read_stan_csv(). See the Value section for details on the structure of the returned list.

    It is also possible to create CmdStanR's fitted model objects directly from CmdStan CSV files using the as_cmdstan_fit() function.

    -
    read_cmdstan_csv(
    -  files,
    -  variables = NULL,
    -  sampler_diagnostics = NULL,
    -  format = getOption("cmdstanr_draws_format", NULL)
    -)
    +    
    +
    read_cmdstan_csv(
    +  files,
    +  variables = NULL,
    +  sampler_diagnostics = NULL,
    +  format = getOption("cmdstanr_draws_format", NULL)
    +)
    +
    +as_cmdstan_fit(
    +  files,
    +  check_diagnostics = TRUE,
    +  format = getOption("cmdstanr_draws_format")
    +)
    +
    + +
    +

    Arguments

    +
    files
    +

    (character vector) The paths to the CmdStan CSV files. These can +be files generated by running CmdStanR or running CmdStan directly.

    -as_cmdstan_fit( - files, - check_diagnostics = TRUE, - format = getOption("cmdstanr_draws_format") -)
    -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    files

    (character vector) The paths to the CmdStan CSV files. These can -be files generated by running CmdStanR or running CmdStan directly.

    variables

    (character vector) Optionally, the names of the variables -(parameters, transformed parameters, and generated quantities) to read in.

      -
    • If NULL (the default) then all variables are included.

    • +
      variables
      +

      (character vector) Optionally, the names of the variables +(parameters, transformed parameters, and generated quantities) to read in.

      • If NULL (the default) then all variables are included.

      • If an empty string (variables="") then none are included.

      • -
      • For non-scalar variables all elements or specific elements can be selected:

          -
        • variables = "theta" selects all elements of theta;

        • +
        • For non-scalar variables all elements or specific elements can be selected:

          • variables = "theta" selects all elements of theta;

          • variables = c("theta[1]", "theta[3]") selects only the 1st and 3rd elements.

        • -
    sampler_diagnostics

    (character vector) Works the same way as + + + +

    sampler_diagnostics
    +

    (character vector) Works the same way as variables but for sampler diagnostic variables (e.g., "treedepth__", -"accept_stat__", etc.). Ignored if the model was not fit using MCMC.

    format

    (string) The format for storing the draws or point estimates. +"accept_stat__", etc.). Ignored if the model was not fit using MCMC.

    + + +
    format
    +

    (string) The format for storing the draws or point estimates. The default depends on the method used to fit the model. See -draws for details, in particular the note about speed -and memory for models with many parameters.

    check_diagnostics

    (logical) For models fit using MCMC, should +draws for details, in particular the note about speed +and memory for models with many parameters.

    + + +
    check_diagnostics
    +

    (logical) For models fit using MCMC, should diagnostic checks be performed after reading in the files? The default is TRUE but set to FALSE to avoid checking for problems with divergences -and treedepth.

    +and treedepth.

    -

    Value

    +
    +
    +

    Value

    + -

    as_cmdstan_fit() returns a CmdStanMCMC, CmdStanMLE, or -CmdStanVB object. Some methods typically defined for those objects will not -work (e.g. save_data_file()) but the important methods like $summary(), +

    as_cmdstan_fit() returns a CmdStanMCMC, CmdStanMLE, or +CmdStanVB object. Some methods typically defined for those objects will not +work (e.g. save_data_file()) but the important methods like $summary(), $draws(), $sampler_diagnostics() and others will work fine.

    -

    read_cmdstan_csv() returns a named list with the following components:

      -
    • metadata: A list of the meta information from the run that produced the -CSV file(s). See Examples below.

    • -
    -

    The other components in the returned list depend on the method that produced + +

    read_cmdstan_csv() returns a named list with the following components:

    • metadata: A list of the meta information from the run that produced the +CSV file(s). See Examples below.

    • +

    The other components in the returned list depend on the method that produced the CSV file(s).

    -

    For sampling the returned list also includes the -following components:

      -
    • time: Run time information for the individual chains. The returned object -is the same as for the $time() method except the total run + + +

      For sampling the returned list also includes the +following components:

      • time: Run time information for the individual chains. The returned object +is the same as for the $time() method except the total run time can't be inferred from the CSV files (the chains may have been run in parallel) and is therefore NA.

      • inv_metric: A list (one element per chain) of inverse mass matrices or their diagonals, depending on the type of metric used.

      • step_size: A list (one element per chain) of the step sizes used.

      • warmup_draws: If save_warmup was TRUE when fitting the model then a -draws_array (or different format if format is +draws_array (or different format if format is specified) of warmup draws.

      • -
      • post_warmup_draws: A draws_array (or +

      • post_warmup_draws: A draws_array (or different format if format is specified) of post-warmup draws.

      • warmup_sampler_diagnostics: If save_warmup was TRUE when fitting the -model then a draws_array (or different format if +model then a draws_array (or different format if format is specified) of warmup draws of the sampler diagnostic variables.

      • post_warmup_sampler_diagnostics: A -draws_array (or different format if format is +draws_array (or different format if format is specified) of post-warmup draws of the sampler diagnostic variables.

      • -
      - -

      For optimization the returned list also includes the -following components:

        -
      • point_estimates: Point estimates for the model parameters.

      • -
      - -

      For variational inference the returned list also -includes the following components:

      For optimization the returned list also includes the +following components:

      • point_estimates: Point estimates for the model parameters.

      • +

      For variational inference the returned list also +includes the following components:

      • draws: A draws_matrix (or different format if format is specified) of draws from the approximate posterior distribution.

      • -
      - -

      For standalone generated quantities the -returned list also includes the following components:

      For standalone generated quantities the +returned list also includes the following components:

      • generated_quantities: A draws_array of the generated quantities.

      • -
      - - -

      Examples

      -
      # \dontrun{ -# Generate some CSV files to use for demonstration -fit1 <- cmdstanr_example("logistic", method = "sample", save_warmup = TRUE) -csv_files <- fit1$output_files() -print(csv_files) -
      #> [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181227-1-726257.csv" -#> [2] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181227-2-726257.csv" -#> [3] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181227-3-726257.csv" -#> [4] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-202203181227-4-726257.csv"
      -# Creating fitting model objects - -# Create a CmdStanMCMC object from the CSV files -fit2 <- as_cmdstan_fit(csv_files) -fit2$print("beta") -
      #> variable mean median sd mad q5 q95 rhat ess_bulk ess_tail -#> beta[1] -0.67 -0.66 0.24 0.24 -1.08 -0.28 1.00 4045 3169 -#> beta[2] -0.27 -0.27 0.22 0.22 -0.64 0.08 1.00 3894 2721 -#> beta[3] 0.69 0.68 0.26 0.26 0.27 1.13 1.00 3754 2922
      -# Using read_cmdstan_csv -# -# Read in everything -x <- read_cmdstan_csv(csv_files) -str(x) -
      #> List of 8 -#> $ metadata :List of 40 -#> ..$ stan_version_major : num 2 -#> ..$ stan_version_minor : num 29 -#> ..$ stan_version_patch : num 1 -#> ..$ start_datetime : chr "2022-03-18 18:27:08 UTC" -#> ..$ method : chr "sample" -#> ..$ save_warmup : num 1 -#> ..$ thin : num 1 -#> ..$ gamma : num 0.05 -#> ..$ kappa : num 0.75 -#> ..$ t0 : num 10 -#> ..$ init_buffer : num 75 -#> ..$ term_buffer : num 50 -#> ..$ window : num 25 -#> ..$ algorithm : chr "hmc" -#> ..$ engine : chr "nuts" -#> ..$ metric : chr "diag_e" -#> ..$ stepsize_jitter : num 0 -#> ..$ num_chains : num 1 -#> ..$ id : num [1:4] 1 2 3 4 -#> ..$ init : num [1:4] 2 2 2 2 -#> ..$ seed : num 27467875 -#> ..$ refresh : num 100 -#> ..$ sig_figs : num -1 -#> ..$ profile_file : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpmzUYEz/logistic-profile-202203181227-1-051c26.csv" -#> ..$ stanc_version : chr "stanc3 v2.29.1" -#> ..$ sampler_diagnostics : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ... -#> ..$ variables : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ... -#> ..$ step_size_adaptation: num [1:4] 0.729 0.767 0.747 0.752 -#> ..$ model_name : chr "logistic_model" -#> ..$ adapt_engaged : num 1 -#> ..$ adapt_delta : num 0.8 -#> ..$ max_treedepth : num 10 -#> ..$ step_size : num [1:4] 1 1 1 1 -#> ..$ iter_warmup : num 1000 -#> ..$ iter_sampling : num 1000 -#> ..$ threads_per_chain : num 1 -#> ..$ time :'data.frame': 4 obs. of 4 variables: -#> .. ..$ chain_id: num [1:4] 1 2 3 4 -#> .. ..$ warmup : num [1:4] 0.093 0.087 0.149 0.092 -#> .. ..$ sampling: num [1:4] 0.081 0.096 0.1 0.087 -#> .. ..$ total : num [1:4] 0.174 0.183 0.249 0.179 -#> ..$ stan_variable_sizes :List of 4 -#> .. ..$ lp__ : num 1 -#> .. ..$ alpha : num 1 -#> .. ..$ beta : num 3 -#> .. ..$ log_lik: num 100 -#> ..$ stan_variables : chr [1:4] "lp__" "alpha" "beta" "log_lik" -#> ..$ model_params : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ... -#> $ time :List of 2 -#> ..$ total : int NA -#> ..$ chains:'data.frame': 4 obs. of 4 variables: -#> .. ..$ chain_id: num [1:4] 1 2 3 4 -#> .. ..$ warmup : num [1:4] 0.093 0.087 0.149 0.092 -#> .. ..$ sampling: num [1:4] 0.081 0.096 0.1 0.087 -#> .. ..$ total : num [1:4] 0.174 0.183 0.249 0.179 -#> $ inv_metric :List of 4 -#> ..$ 1: num [1:4] 0.046 0.0637 0.0532 0.0736 -#> ..$ 2: num [1:4] 0.0421 0.0566 0.0523 0.0756 -#> ..$ 3: num [1:4] 0.0493 0.0528 0.0523 0.0753 -#> ..$ 4: num [1:4] 0.0365 0.0565 0.0397 0.0632 -#> $ step_size :List of 4 -#> ..$ 1: num 0.729 -#> ..$ 2: num 0.767 -#> ..$ 3: num 0.747 -#> ..$ 4: num 0.752 -#> $ warmup_draws : 'draws_array' num [1:1000, 1:4, 1:105] -66.8 -66.8 -66.8 -65.8 -66.3 ... -#> ..- attr(*, "dimnames")=List of 3 -#> .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... -#> .. ..$ chain : chr [1:4] "1" "2" "3" "4" -#> .. ..$ variable : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ... -#> $ post_warmup_draws : 'draws_array' num [1:1000, 1:4, 1:105] -65 -65.7 -64.4 -64.2 -65.3 ... -#> ..- attr(*, "dimnames")=List of 3 -#> .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... -#> .. ..$ chain : chr [1:4] "1" "2" "3" "4" -#> .. ..$ variable : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ... -#> $ warmup_sampler_diagnostics : 'draws_array' num [1:1000, 1:4, 1:6] 1 0 0 0.941 0.933 ... -#> ..- attr(*, "dimnames")=List of 3 -#> .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... -#> .. ..$ chain : chr [1:4] "1" "2" "3" "4" -#> .. ..$ variable : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ... -#> $ post_warmup_sampler_diagnostics: 'draws_array' num [1:1000, 1:4, 1:6] 1 0.916 0.998 0.972 0.911 ... -#> ..- attr(*, "dimnames")=List of 3 -#> .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ... -#> .. ..$ chain : chr [1:4] "1" "2" "3" "4" -#> .. ..$ variable : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ...
      -# Don't read in any of the sampler diagnostic variables -x <- read_cmdstan_csv(csv_files, sampler_diagnostics = "") - -# Don't read in any of the parameters or generated quantities -x <- read_cmdstan_csv(csv_files, variables = "") - -# Read in only specific parameters and sampler diagnostics -x <- read_cmdstan_csv( - csv_files, - variables = c("alpha", "beta[2]"), - sampler_diagnostics = c("n_leapfrog__", "accept_stat__") -) - -# For non-scalar parameters all elements can be selected or only some elements, -# e.g. all of the vector "beta" but only one element of the vector "log_lik" -x <- read_cmdstan_csv( - csv_files, - variables = c("beta", "log_lik[3]") -) -# } - -
      +
    + +
    +

    Examples

    +
    # \dontrun{
    +# Generate some CSV files to use for demonstration
    +fit1 <- cmdstanr_example("logistic", method = "sample", save_warmup = TRUE)
    +csv_files <- fit1$output_files()
    +print(csv_files)
    +#> [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251438-1-0afc76.csv"
    +#> [2] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251438-2-0afc76.csv"
    +#> [3] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251438-3-0afc76.csv"
    +#> [4] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-202307251438-4-0afc76.csv"
    +
    +# Creating fitting model objects
    +
    +# Create a CmdStanMCMC object from the CSV files
    +fit2 <- as_cmdstan_fit(csv_files)
    +fit2$print("beta")
    +#>  variable  mean median   sd  mad    q5   q95 rhat ess_bulk ess_tail
    +#>   beta[1] -0.67  -0.66 0.25 0.25 -1.09 -0.26 1.00     4467     2955
    +#>   beta[2] -0.27  -0.27 0.22 0.22 -0.63  0.09 1.00     4244     3209
    +#>   beta[3]  0.68   0.68 0.26 0.26  0.25  1.12 1.00     4340     3315
    +
    +# Using read_cmdstan_csv
    +#
    +# Read in everything
    +x <- read_cmdstan_csv(csv_files)
    +str(x)
    +#> List of 8
    +#>  $ metadata                       :List of 40
    +#>   ..$ stan_version_major  : num 2
    +#>   ..$ stan_version_minor  : num 32
    +#>   ..$ stan_version_patch  : num 2
    +#>   ..$ start_datetime      : chr "2023-07-25 20:38:31 UTC"
    +#>   ..$ method              : chr "sample"
    +#>   ..$ save_warmup         : num 1
    +#>   ..$ thin                : num 1
    +#>   ..$ gamma               : num 0.05
    +#>   ..$ kappa               : num 0.75
    +#>   ..$ t0                  : num 10
    +#>   ..$ init_buffer         : num 75
    +#>   ..$ term_buffer         : num 50
    +#>   ..$ window              : num 25
    +#>   ..$ algorithm           : chr "hmc"
    +#>   ..$ engine              : chr "nuts"
    +#>   ..$ metric              : chr "diag_e"
    +#>   ..$ stepsize_jitter     : num 0
    +#>   ..$ num_chains          : num 1
    +#>   ..$ id                  : num [1:4] 1 2 3 4
    +#>   ..$ init                : num [1:4] 2 2 2 2
    +#>   ..$ seed                : num 1.15e+09
    +#>   ..$ refresh             : num 100
    +#>   ..$ sig_figs            : num -1
    +#>   ..$ profile_file        : chr "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/logistic-profile-202307251438-1-5569d9.csv"
    +#>   ..$ stanc_version       : chr "stanc3 v2.32.2"
    +#>   ..$ sampler_diagnostics : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ...
    +#>   ..$ variables           : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    +#>   ..$ step_size_adaptation: num [1:4] 0.666 0.702 0.658 0.727
    +#>   ..$ model_name          : chr "logistic_model"
    +#>   ..$ adapt_engaged       : num 1
    +#>   ..$ adapt_delta         : num 0.8
    +#>   ..$ max_treedepth       : num 10
    +#>   ..$ step_size           : num [1:4] 1 1 1 1
    +#>   ..$ iter_warmup         : num 1000
    +#>   ..$ iter_sampling       : num 1000
    +#>   ..$ threads_per_chain   : num 1
    +#>   ..$ time                :'data.frame':	4 obs. of  4 variables:
    +#>   .. ..$ chain_id: num [1:4] 1 2 3 4
    +#>   .. ..$ warmup  : num [1:4] 0.07 0.081 0.077 0.071
    +#>   .. ..$ sampling: num [1:4] 0.068 0.075 0.065 0.062
    +#>   .. ..$ total   : num [1:4] 0.138 0.156 0.142 0.133
    +#>   ..$ stan_variable_sizes :List of 4
    +#>   .. ..$ lp__   : num 1
    +#>   .. ..$ alpha  : num 1
    +#>   .. ..$ beta   : num 3
    +#>   .. ..$ log_lik: num 100
    +#>   ..$ stan_variables      : chr [1:4] "lp__" "alpha" "beta" "log_lik"
    +#>   ..$ model_params        : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    +#>  $ time                           :List of 2
    +#>   ..$ total : int NA
    +#>   ..$ chains:'data.frame':	4 obs. of  4 variables:
    +#>   .. ..$ chain_id: num [1:4] 1 2 3 4
    +#>   .. ..$ warmup  : num [1:4] 0.07 0.081 0.077 0.071
    +#>   .. ..$ sampling: num [1:4] 0.068 0.075 0.065 0.062
    +#>   .. ..$ total   : num [1:4] 0.138 0.156 0.142 0.133
    +#>  $ inv_metric                     :List of 4
    +#>   ..$ 1: num [1:4] 0.0426 0.0631 0.0525 0.0739
    +#>   ..$ 2: num [1:4] 0.0483 0.0658 0.0426 0.065
    +#>   ..$ 3: num [1:4] 0.0473 0.0591 0.0532 0.0814
    +#>   ..$ 4: num [1:4] 0.0485 0.0583 0.05 0.0707
    +#>  $ step_size                      :List of 4
    +#>   ..$ 1: num 0.666
    +#>   ..$ 2: num 0.702
    +#>   ..$ 3: num 0.658
    +#>   ..$ 4: num 0.727
    +#>  $ warmup_draws                   : 'draws_array' num [1:1000, 1:4, 1:105] -105.5 -105.5 -105.5 -70.7 -66.2 ...
    +#>   ..- attr(*, "dimnames")=List of 3
    +#>   .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
    +#>   .. ..$ chain    : chr [1:4] "1" "2" "3" "4"
    +#>   .. ..$ variable : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    +#>  $ post_warmup_draws              : 'draws_array' num [1:1000, 1:4, 1:105] -65.2 -64.4 -65.4 -64.4 -70.3 ...
    +#>   ..- attr(*, "dimnames")=List of 3
    +#>   .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
    +#>   .. ..$ chain    : chr [1:4] "1" "2" "3" "4"
    +#>   .. ..$ variable : chr [1:105] "lp__" "alpha" "beta[1]" "beta[2]" ...
    +#>  $ warmup_sampler_diagnostics     : 'draws_array' num [1:1000, 1:4, 1:6] 6.68e-01 0.00 1.27e-204 9.99e-01 9.89e-01 ...
    +#>   ..- attr(*, "dimnames")=List of 3
    +#>   .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
    +#>   .. ..$ chain    : chr [1:4] "1" "2" "3" "4"
    +#>   .. ..$ variable : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ...
    +#>  $ post_warmup_sampler_diagnostics: 'draws_array' num [1:1000, 1:4, 1:6] 0.997 0.935 0.852 0.98 0.644 ...
    +#>   ..- attr(*, "dimnames")=List of 3
    +#>   .. ..$ iteration: chr [1:1000] "1" "2" "3" "4" ...
    +#>   .. ..$ chain    : chr [1:4] "1" "2" "3" "4"
    +#>   .. ..$ variable : chr [1:6] "accept_stat__" "stepsize__" "treedepth__" "n_leapfrog__" ...
    +
    +# Don't read in any of the sampler diagnostic variables
    +x <- read_cmdstan_csv(csv_files, sampler_diagnostics = "")
    +
    +# Don't read in any of the parameters or generated quantities
    +x <- read_cmdstan_csv(csv_files, variables = "")
    +
    +# Read in only specific parameters and sampler diagnostics
    +x <- read_cmdstan_csv(
    +  csv_files,
    +  variables = c("alpha", "beta[2]"),
    +  sampler_diagnostics = c("n_leapfrog__", "accept_stat__")
    +)
    +
    +# For non-scalar parameters all elements can be selected or only some elements,
    +# e.g. all of the vector "beta" but only one element of the vector "log_lik"
    +x <- read_cmdstan_csv(
    +  csv_files,
    +  variables = c("beta", "log_lik[3]")
    +)
    +# }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/read_sample_csv.html b/docs/reference/read_sample_csv.html index e06b05c61..af34b60aa 100644 --- a/docs/reference/read_sample_csv.html +++ b/docs/reference/read_sample_csv.html @@ -1,75 +1,12 @@ - - - - - - - -Read CmdStan CSV files from sampling into R — read_sample_csv • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Read CmdStan CSV files from sampling into R — read_sample_csv • cmdstanr - - - - + + -
    -
    - -
    - -
    +
    -

    Deprecated. Use read_cmdstan_csv() instead.

    +

    Deprecated. Use read_cmdstan_csv() instead.

    -
    read_sample_csv(files, variables = NULL, sampler_diagnostics = NULL)
    +
    +
    read_sample_csv(files, variables = NULL, sampler_diagnostics = NULL)
    +
    -

    Arguments

    - - - - - - -
    files, variables, sampler_diagnostics

    Deprecated. Use -read_cmdstan_csv() instead.

    +
    +

    Arguments

    +
    files, variables, sampler_diagnostics
    +

    Deprecated. Use +read_cmdstan_csv() instead.

    +
    +
    -
    - - + + diff --git a/docs/reference/register_knitr_engine.html b/docs/reference/register_knitr_engine.html index 10533e5c4..1e4517702 100644 --- a/docs/reference/register_knitr_engine.html +++ b/docs/reference/register_knitr_engine.html @@ -1,78 +1,15 @@ - - - - - - - -Register CmdStanR's knitr engine for Stan — register_knitr_engine • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Register CmdStanR's knitr engine for Stan — register_knitr_engine • cmdstanr - - - - - - - - - - - - - - + + -
    -
    - -
    - -
    +
    -

    Registers CmdStanR's knitr engine eng_cmdstan() for processing Stan chunks. +

    Registers CmdStanR's knitr engine eng_cmdstan() for processing Stan chunks. Refer to the vignette -R Markdown CmdStan Engine +R Markdown CmdStan Engine for a demonstration.

    -
    register_knitr_engine(override = TRUE)
    - -

    Arguments

    - - - - - - -
    override

    (logical) Override knitr's built-in, RStan-based engine for -Stan? The default is TRUE. See Details.

    +
    +
    register_knitr_engine(override = TRUE)
    +
    -

    Details

    +
    +

    Arguments

    +
    override
    +

    (logical) Override knitr's built-in, RStan-based engine for +Stan? The default is TRUE. See Details.

    +
    +
    +

    Details

    If override = TRUE (default), this registers CmdStanR's knitr engine as the engine for stan chunks, replacing knitr's built-in, RStan-based engine. If override = FALSE, this registers a cmdstan engine so that both engines @@ -206,10 +133,10 @@

    Details highlighting for the Stan language, the cmdstan chunks will have stan syntax highlighting applied to them.

    See the vignette -R Markdown CmdStan Engine +R Markdown CmdStan Engine for an example.

    Note: When running chunks interactively in RStudio (e.g. when using -R Notebooks), it has +R Notebooks), it has been observed that the built-in, RStan-based engine is used for stan chunks even when CmdStanR's engine has been registered in the session. When the R Markdown document is knit/rendered, the correct engine is used. As a @@ -218,40 +145,35 @@

    Details

    If you would like to keep stan chunks as stan chunks, it is possible to specify engine = "cmdstan" in the chunk options after registering the cmdstan engine with override = FALSE.

    -

    References

    - +
    +
    +
    -
    - - + + diff --git a/docs/reference/set_cmdstan_path.html b/docs/reference/set_cmdstan_path.html index 891803ca4..a91b01e82 100644 --- a/docs/reference/set_cmdstan_path.html +++ b/docs/reference/set_cmdstan_path.html @@ -1,79 +1,16 @@ - - - - - - - -Get or set the file path to the CmdStan installation — set_cmdstan_path • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Get or set the file path to the CmdStan installation — set_cmdstan_path • cmdstanr - - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +
    @@ -187,80 +115,82 @@

    Get or set the file path to the CmdStan installation

    for how to avoid manually setting the path in each R session.

    -
    set_cmdstan_path(path = NULL)
    +    
    +
    set_cmdstan_path(path = NULL)
    +
    +cmdstan_path()
    +
    +cmdstan_version(error_on_NA = TRUE)
    +
    -cmdstan_path() +
    +

    Arguments

    +
    path
    +

    (string) The full file path to the CmdStan installation. If +NULL (the default) then the path is set to the default path used by +install_cmdstan() if it exists.

    -cmdstan_version(error_on_NA = TRUE)
    -

    Arguments

    - - - - - - - - - - -
    path

    (string) The full file path to the CmdStan installation. If -NULL (the default) then the path is set to the default path used by -install_cmdstan() if it exists.

    error_on_NA

    (logical) Should an error be thrown if CmdStan is not +

    error_on_NA
    +

    (logical) Should an error be thrown if CmdStan is not found. The default is TRUE. If FALSE, cmdstan_version() returns -NULL.

    +NULL.

    -

    Value

    +
    +
    +

    Value

    + -

    A string. Either the file path to the CmdStan installation or the +

    A string. Either the file path to the CmdStan installation or the CmdStan version number.

    + +

    CmdStan version string if available. If CmdStan is not found and error_on_NA is FALSE, cmdstan_version() returns NULL.

    -

    Details

    - +
    +
    +

    Details

    Before the package can be used it needs to know where the CmdStan installation is located. When the package is loaded it tries to help automate -this to avoid having to manually set the path every session:

      -
    • If the environment variable "CMDSTAN" exists at load time +this to avoid having to manually set the path every session:

      • If the environment variable "CMDSTAN" exists at load time then its value will be automatically set as the default path to CmdStan for -the R session.

      • +the R session. If the environment variable "CMDSTAN" is set, but a valid +CmdStan is not found in the supplied path, the path is treated as a top +folder that contains CmdStan installations. In that case, the CmdStan +installation with the largest version number will be set as the path to +CmdStan for the R session.

      • If no environment variable is found when loaded but any directory in the form ".cmdstan/cmdstan-[version]" (e.g., ".cmdstan/cmdstan-2.23.0"), -exists in the user's home directory (Sys.getenv("HOME"), not the current +exists in the user's home directory (Sys.getenv("HOME"), not the current working directory) then the path to the cmdstan with the largest version number will be set as the path to CmdStan for the R session. This is the -same as the default directory that install_cmdstan() would use to install +same as the default directory that install_cmdstan() would use to install the latest version of CmdStan.

      • -
      - -

      It is always possible to change the path after loading the package using +

    It is always possible to change the path after loading the package using set_cmdstan_path(path).

    +
    +
    - - - + + diff --git a/docs/reference/stan_threads.html b/docs/reference/stan_threads.html index 633442c4a..eeeb82fae 100644 --- a/docs/reference/stan_threads.html +++ b/docs/reference/stan_threads.html @@ -1,75 +1,12 @@ - - - - - - - -Set or get the number of threads used to execute Stan models — stan_threads • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Set or get the number of threads used to execute Stan models — stan_threads • cmdstanr - + + - - - -
    -
    - -
    - -
    +
    @@ -179,48 +107,46 @@

    Set or get the number of threads used to execute Stan models

    DEPRECATED. Please use the threads_per_chain argument when fitting the model.

    -
    num_threads()
    -
    -set_num_threads(num_threads)
    +
    +
    num_threads()
    +
    +set_num_threads(num_threads)
    +
    -

    Arguments

    - - - - - - -
    num_threads

    (positive integer) The number of threads to set.

    +
    +

    Arguments

    +
    num_threads
    +

    (positive integer) The number of threads to set.

    -

    Value

    +
    +
    +

    Value

    + -

    The value of the environment variable STAN_NUM_THREADS.

    +

    The value of the environment variable STAN_NUM_THREADS.

    +
    +
    - - - + + diff --git a/docs/reference/write_stan_file.html b/docs/reference/write_stan_file.html index dae2b825b..9b772e61c 100644 --- a/docs/reference/write_stan_file.html +++ b/docs/reference/write_stan_file.html @@ -1,82 +1,19 @@ - - - - - - - -Write Stan code to a file — write_stan_file • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Write Stan code to a file — write_stan_file • cmdstanr - - - - - - - - - - + + - - - - -
    -
    - -
    - -
    +

    Convenience function for writing Stan code to a (possibly -temporary) file with a .stan extension. By default, the -file name is chosen deterministically based on a hash +temporary) file with a .stan extension. By default, the +file name is chosen deterministically based on a hash of the Stan code, and the file is not overwritten if it already has correct contents. This means that calling this function multiple times with the same Stan code will reuse the compiled model. This also however means that the @@ -193,121 +121,125 @@

    Write Stan code to a file

    should ensure thread-safety in the rare cases when it is needed.

    -
    write_stan_file(
    -  code,
    -  dir = getOption("cmdstanr_write_stan_file_dir", tempdir()),
    -  basename = NULL,
    -  force_overwrite = FALSE,
    -  hash_salt = ""
    -)
    +
    +
    write_stan_file(
    +  code,
    +  dir = getOption("cmdstanr_write_stan_file_dir", tempdir()),
    +  basename = NULL,
    +  force_overwrite = FALSE,
    +  hash_salt = ""
    +)
    +
    -

    Arguments

    - - - - - - - - - - - - - - - - - - - - - - -
    code

    (character vector) The Stan code to write to the file. This can +

    +

    Arguments

    +
    code
    +

    (character vector) The Stan code to write to the file. This can be a character vector of length one (a string) containing the entire Stan program or a character vector with each element containing one line of the -Stan program.

    dir

    (string) An optional path to the directory where the file will be +Stan program.

    + + +
    dir
    +

    (string) An optional path to the directory where the file will be written. If omitted, a global option cmdstanr_write_stan_file_dir is -used. If the global options is not set, temporary directory -is used.

    basename

    (string) If dir is specified, optionally the basename to +used. If the global options is not set, temporary directory +is used.

    + + +
    basename
    +

    (string) If dir is specified, optionally the basename to use for the file created. If not specified a file name is generated -from hashing the code.

    force_overwrite

    (logical) If set to TRUE the file will always be -overwritten and thus the resulting model will always be recompiled.

    hash_salt

    (string) Text to add to the model code prior to hashing to -determine the file name if basename is not set.

    +from hashing the code.

    + -

    Value

    +
    force_overwrite
    +

    (logical) If set to TRUE the file will always be +overwritten and thus the resulting model will always be recompiled.

    -

    The path to the file.

    -

    Examples

    -
    # stan program as a single string -stan_program <- " -data { - int<lower=0> N; - int<lower=0,upper=1> y[N]; -} -parameters { - real<lower=0,upper=1> theta; -} -model { - y ~ bernoulli(theta); -} -" +
    hash_salt
    +

    (string) Text to add to the model code prior to hashing to +determine the file name if basename is not set.

    -f <- write_stan_file(stan_program) -print(f) -
    #> [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T//RtmpmzUYEz/model_08f9e456ca04f3d3244db00f16ea5748.stan"
    -lines <- readLines(f) -print(lines) -
    #> [1] "" "data {" -#> [3] " int<lower=0> N;" " int<lower=0,upper=1> y[N];" -#> [5] "}" "parameters {" -#> [7] " real<lower=0,upper=1> theta;" "}" -#> [9] "model {" " y ~ bernoulli(theta);" -#> [11] "}" ""
    cat(lines, sep = "\n") -
    #> -#> data { -#> int<lower=0> N; -#> int<lower=0,upper=1> y[N]; -#> } -#> parameters { -#> real<lower=0,upper=1> theta; -#> } -#> model { -#> y ~ bernoulli(theta); -#> } -#>
    -# stan program as character vector of lines -f2 <- write_stan_file(lines) -identical(readLines(f), readLines(f2)) -
    #> [1] TRUE
    -
    +
    +
    +

    Value

    + + +

    The path to the file.

    +
    + +
    +

    Examples

    +
    # stan program as a single string
    +stan_program <- "
    +data {
    +  int<lower=0> N;
    +  int<lower=0,upper=1> y[N];
    +}
    +parameters {
    +  real<lower=0,upper=1> theta;
    +}
    +model {
    +  y ~ bernoulli(theta);
    +}
    +"
    +
    +f <- write_stan_file(stan_program)
    +print(f)
    +#> [1] "/var/folders/s0/zfzm55px2nd2v__zlw5xfj2h0000gn/T/RtmpFBtN6X/model_08f9e456ca04f3d3244db00f16ea5748.stan"
    +
    +lines <- readLines(f)
    +print(lines)
    +#>  [1] ""                               "data {"                        
    +#>  [3] "  int<lower=0> N;"              "  int<lower=0,upper=1> y[N];"  
    +#>  [5] "}"                              "parameters {"                  
    +#>  [7] "  real<lower=0,upper=1> theta;" "}"                             
    +#>  [9] "model {"                        "  y ~ bernoulli(theta);"       
    +#> [11] "}"                              ""                              
    +cat(lines, sep = "\n")
    +#> 
    +#> data {
    +#>   int<lower=0> N;
    +#>   int<lower=0,upper=1> y[N];
    +#> }
    +#> parameters {
    +#>   real<lower=0,upper=1> theta;
    +#> }
    +#> model {
    +#>   y ~ bernoulli(theta);
    +#> }
    +#> 
    +
    +# stan program as character vector of lines
    +f2 <- write_stan_file(lines)
    +identical(readLines(f), readLines(f2))
    +#> [1] TRUE
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/write_stan_json.html b/docs/reference/write_stan_json.html index 653081d79..a241a6d9f 100644 --- a/docs/reference/write_stan_json.html +++ b/docs/reference/write_stan_json.html @@ -1,75 +1,12 @@ - - - - - - - -Write data to a JSON file readable by CmdStan — write_stan_json • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Write data to a JSON file readable by CmdStan — write_stan_json • cmdstanr - - + + - - -
    -
    - -
    - -
    +
    @@ -179,120 +107,114 @@

    Write data to a JSON file readable by CmdStan

    Write data to a JSON file readable by CmdStan

    -
    write_stan_json(data, file, always_decimal = FALSE)
    +
    +
    write_stan_json(data, file, always_decimal = FALSE)
    +
    + +
    +

    Arguments

    +
    data
    +

    (list) A named list of R objects.

    + + +
    file
    +

    (string) The path to where the data file should be written.

    + -

    Arguments

    - - - - - - - - - - - - - - -
    data

    (list) A named list of R objects.

    file

    (string) The path to where the data file should be written.

    always_decimal

    (logical) Force generate non-integers with decimal +

    always_decimal
    +

    (logical) Force generate non-integers with decimal points to better distinguish between integers and floating point values. If TRUE all R objects in data intended for integers must be of integer -type.

    - -

    Details

    +type.

    +
    +
    +

    Details

    write_stan_json() performs several conversions before writing the JSON -file:

      -
    • logical -> integer (TRUE -> 1, FALSE -> 0)

    • -
    • data.frame -> matrix (via data.matrix())

    • +file:

      • logical -> integer (TRUE -> 1, FALSE -> 0)

      • +
      • data.frame -> matrix (via data.matrix())

      • list -> array

      • table -> vector, matrix, or array (depending on dimensions of table)

      • -
      - -

      The list to array conversion is intended to make it easier to prepare -the data for certain Stan declarations involving arrays:

        -
      • vector[J] v[K] (or equivalently array[K] vector[J] v as of Stan 2.27) +

      The list to array conversion is intended to make it easier to prepare +the data for certain Stan declarations involving arrays:

      • vector[J] v[K] (or equivalently array[K] vector[J] v as of Stan 2.27) can be constructed in R as a list with K elements where each element a vector of length J

      • matrix[I,J] v[K] (or equivalently array[K] matrix[I,J] m as of Stan 2.27 ) can be constructed in R as a list with K elements where each element an IxJ matrix

      • -
      - -

      These can also be passed in from R as arrays instead of lists but the list +

    These can also be passed in from R as arrays instead of lists but the list option is provided for convenience. Unfortunately for arrays with more than one dimension, e.g., vector[J] v[K,L] (or equivalently array[K,L] vector[J] v as of Stan 2.27) it is not possible to use an R list and an array must be used instead. For this example the array in R should have dimensions KxLxJ.

    +
    -

    Examples

    -
    x <- matrix(rnorm(10), 5, 2) -y <- rpois(nrow(x), lambda = 10) -z <- c(TRUE, FALSE) -data <- list(N = nrow(x), K = ncol(x), x = x, y = y, z = z) - -# write data to json file -file <- tempfile(fileext = ".json") -write_stan_json(data, file) - -# check the contents of the file -cat(readLines(file), sep = "\n") -
    #> { -#> "N": 5, -#> "K": 2, -#> "x": [ -#> [0.552235993548325, 0.721685253131368], -#> [1.40374671287315, -0.278162254251497], -#> [0.512051509623765, -0.524699374531002], -#> [0.660479109910809, 0.0625388737657775], -#> [-0.519175154088263, -0.450598871694098] -#> ], -#> "y": [16, 11, 7, 7, 6], -#> "z": [1, 0] -#> }
    - -# demonstrating list to array conversion -# suppose x is declared as `vector[3] x[2]` (or equivalently `array[2] vector[3] x`) -# we can use a list of length 2 where each element is a vector of length 3 -data <- list(x = list(1:3, 4:6)) -file <- tempfile(fileext = ".json") -write_stan_json(data, file) -cat(readLines(file), sep = "\n") -
    #> { -#> "x": [ -#> [1, 2, 3], -#> [4, 5, 6] -#> ] -#> }
    -
    +
    +

    Examples

    +
    x <- matrix(rnorm(10), 5, 2)
    +y <- rpois(nrow(x), lambda = 10)
    +z <- c(TRUE, FALSE)
    +data <- list(N = nrow(x), K = ncol(x), x = x, y = y, z = z)
    +
    +# write data to json file
    +file <- tempfile(fileext = ".json")
    +write_stan_json(data, file)
    +
    +# check the contents of the file
    +cat(readLines(file), sep = "\n")
    +#> {
    +#>   "N": 5,
    +#>   "K": 2,
    +#>   "x": [
    +#>     [1.39067866394794, 0.962441316104017],
    +#>     [-0.473566508639385, -0.892862153643971],
    +#>     [-2.10865772647623, 0.757598190570521],
    +#>     [-0.366087824635297, -0.120402278774405],
    +#>     [1.44942517823716, -0.444483690720062]
    +#>   ],
    +#>   "y": [5, 11, 1, 9, 11],
    +#>   "z": [1, 0]
    +#> }
    +
    +
    +# demonstrating list to array conversion
    +# suppose x is declared as `vector[3] x[2]` (or equivalently `array[2] vector[3] x`)
    +# we can use a list of length 2 where each element is a vector of length 3
    +data <- list(x = list(1:3, 4:6))
    +file <- tempfile(fileext = ".json")
    +write_stan_json(data, file)
    +cat(readLines(file), sep = "\n")
    +#> {
    +#>   "x": [
    +#>     [1, 2, 3],
    +#>     [4, 5, 6]
    +#>   ]
    +#> }
    +
    +
    +
    +
    - - - + + diff --git a/docs/reference/write_stan_tempfile.html b/docs/reference/write_stan_tempfile.html index fd650e821..4e9c4ac10 100644 --- a/docs/reference/write_stan_tempfile.html +++ b/docs/reference/write_stan_tempfile.html @@ -1,75 +1,12 @@ - - - - - - - -Write Stan code to a temporary file — write_stan_tempfile • cmdstanr - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Write Stan code to a temporary file — write_stan_tempfile • cmdstanr - + + - - - -
    -
    - -
    - -
    +
    -

    This function is deprecated. Please use write_stan_file() instead.

    +

    This function is deprecated. Please use write_stan_file() instead.

    -
    write_stan_tempfile(code, dir = tempdir())
    +
    +
    write_stan_tempfile(code, dir = tempdir())
    +
    -

    Arguments

    - - - - - - - - - - -
    code

    (character vector) The Stan code to write to the file. This can +

    +

    Arguments

    +
    code
    +

    (character vector) The Stan code to write to the file. This can be a character vector of length one (a string) containing the entire Stan program or a character vector with each element containing one line of the -Stan program.

    dir

    (string) An optional path to the directory where the file will be +Stan program.

    + + +
    dir
    +

    (string) An optional path to the directory where the file will be written. If omitted, a global option cmdstanr_write_stan_file_dir is -used. If the global options is not set, temporary directory -is used.

    +used. If the global options is not set, temporary directory +is used.

    +
    +
    - - - + + diff --git a/man/CmdStanMCMC.Rd b/man/CmdStanMCMC.Rd index 98b7f4374..218ee49c3 100644 --- a/man/CmdStanMCMC.Rd +++ b/man/CmdStanMCMC.Rd @@ -55,6 +55,21 @@ methods, all of which have their own (linked) documentation pages. \code{\link[=fit-method-return_codes]{$return_codes()}} \tab Return the return codes from the CmdStan runs. \cr } +} + +\subsection{Expose Stan functions and additional methods to R}{\tabular{ll}{ + \strong{Method} \tab \strong{Description} \cr + \code{\link[=fit-method-expose_functions]{$expose_functions()}} \tab Expose Stan functions for use in R. \cr + \code{\link[=fit-method-init_model_methods]{$init_model_methods()}} \tab Expose methods for log-probability, gradients, parameter constraining and unconstraining. \cr + \code{\link[=fit-method-log_prob]{$log_prob()}} \tab Calculate log-prob. \cr + \code{\link[=fit-method-grad_log_prob]{$grad_log_prob()}} \tab Calculate log-prob and gradient. \cr + \code{\link[=fit-method-hessian]{$hessian()}} \tab Calculate log-prob, gradient, and hessian. \cr + \code{\link[=fit-method-constrain_variables]{$constrain_variables()}} \tab Transform a set of unconstrained parameter values to the constrained scale. \cr + \code{\link[=fit-method-unconstrain_variables]{$unconstrain_variables()}} \tab Transform a set of parameter values to the unconstrained scale. \cr + \code{\link[=fit-method-unconstrain_draws]{$unconstrain_draws()}} \tab Transform all parameter draws to the unconstrained scale. \cr + \code{\link[=fit-method-variable_skeleton]{$variable_skeleton()}} \tab Helper function to re-structure a vector of constrained parameter values. \cr +} + } } diff --git a/man/CmdStanMLE.Rd b/man/CmdStanMLE.Rd index c5044acc7..01acae4d9 100644 --- a/man/CmdStanMLE.Rd +++ b/man/CmdStanMLE.Rd @@ -45,6 +45,21 @@ all of which have their own (linked) documentation pages. \code{\link[=fit-method-return_codes]{$return_codes()}} \tab Return the return codes from the CmdStan runs. \cr } +} + +\subsection{Expose Stan functions and additional methods to R}{\tabular{ll}{ + \strong{Method} \tab \strong{Description} \cr + \code{\link[=fit-method-expose_functions]{$expose_functions()}} \tab Expose Stan functions for use in R. \cr + \code{\link[=fit-method-init_model_methods]{$init_model_methods()}} \tab Expose methods for log-probability, gradients, parameter constraining and unconstraining. \cr + \code{\link[=fit-method-log_prob]{$log_prob()}} \tab Calculate log-prob. \cr + \code{\link[=fit-method-grad_log_prob]{$grad_log_prob()}} \tab Calculate log-prob and gradient. \cr + \code{\link[=fit-method-hessian]{$hessian()}} \tab Calculate log-prob, gradient, and hessian. \cr + \code{\link[=fit-method-constrain_variables]{$constrain_variables()}} \tab Transform a set of unconstrained parameter values to the constrained scale. \cr + \code{\link[=fit-method-unconstrain_variables]{$unconstrain_variables()}} \tab Transform a set of parameter values to the unconstrained scale. \cr + \code{\link[=fit-method-unconstrain_draws]{$unconstrain_draws()}} \tab Transform all parameter draws to the unconstrained scale. \cr + \code{\link[=fit-method-variable_skeleton]{$variable_skeleton()}} \tab Helper function to re-structure a vector of constrained parameter values. \cr +} + } } diff --git a/man/CmdStanModel.Rd b/man/CmdStanModel.Rd index fbe191f82..94ec10d61 100644 --- a/man/CmdStanModel.Rd +++ b/man/CmdStanModel.Rd @@ -29,6 +29,7 @@ methods, many of which have their own (linked) documentation pages: \code{\link[=model-method-compile]{$exe_file()}} \tab Return the file path to the compiled executable. \cr \code{\link[=model-method-compile]{$hpp_file()}} \tab Return the file path to the \code{.hpp} file containing the generated C++ code. \cr \code{\link[=model-method-compile]{$save_hpp_file()}} \tab Save the \code{.hpp} file containing the generated C++ code. \cr + \code{\link[=model-method-expose_functions]{$expose_functions()}} \tab Expose Stan functions for use in R. \cr } } diff --git a/man/CmdStanVB.Rd b/man/CmdStanVB.Rd index b11a361f4..4b4d53ada 100644 --- a/man/CmdStanVB.Rd +++ b/man/CmdStanVB.Rd @@ -48,6 +48,21 @@ all of which have their own (linked) documentation pages. \code{\link[=fit-method-return_codes]{$return_codes()}} \tab Return the return codes from the CmdStan runs. \cr } +} + +\subsection{Expose Stan functions and additional methods to R}{\tabular{ll}{ + \strong{Method} \tab \strong{Description} \cr + \code{\link[=fit-method-expose_functions]{$expose_functions()}} \tab Expose Stan functions for use in R. \cr + \code{\link[=fit-method-init_model_methods]{$init_model_methods()}} \tab Expose methods for log-probability, gradients, parameter constraining and unconstraining. \cr + \code{\link[=fit-method-log_prob]{$log_prob()}} \tab Calculate log-prob. \cr + \code{\link[=fit-method-grad_log_prob]{$grad_log_prob()}} \tab Calculate log-prob and gradient. \cr + \code{\link[=fit-method-hessian]{$hessian()}} \tab Calculate log-prob, gradient, and hessian. \cr + \code{\link[=fit-method-constrain_variables]{$constrain_variables()}} \tab Transform a set of unconstrained parameter values to the constrained scale. \cr + \code{\link[=fit-method-unconstrain_variables]{$unconstrain_variables()}} \tab Transform a set of parameter values to the unconstrained scale. \cr + \code{\link[=fit-method-unconstrain_draws]{$unconstrain_draws()}} \tab Transform all parameter draws to the unconstrained scale. \cr + \code{\link[=fit-method-variable_skeleton]{$variable_skeleton()}} \tab Helper function to re-structure a vector of constrained parameter values. \cr +} + } } diff --git a/man/fit-method-constrain_variables.Rd b/man/fit-method-constrain_variables.Rd index 31cdbe9db..78c5b8098 100644 --- a/man/fit-method-constrain_variables.Rd +++ b/man/fit-method-constrain_variables.Rd @@ -12,13 +12,14 @@ constrain_variables( ) } \arguments{ -\item{unconstrained_variables}{(numeric) A vector of unconstrained parameters to constrain} +\item{unconstrained_variables}{(numeric) A vector of unconstrained parameters +to constrain.} -\item{transformed_parameters}{(boolean) Whether to return transformed parameters -implied by newly-constrained parameters (defaults to TRUE)} +\item{transformed_parameters}{(boolean) Whether to return transformed +parameters implied by newly-constrained parameters (defaults to TRUE).} \item{generated_quantities}{(boolean) Whether to return generated quantities -implied by newly-constrained parameters (defaults to TRUE)} +implied by newly-constrained parameters (defaults to TRUE).} } \description{ The \verb{$constrain_variables()} method transforms input parameters to @@ -28,7 +29,12 @@ the constrained scale \dontrun{ fit_mcmc <- cmdstanr_example("logistic", method = "sample") fit_mcmc$init_model_methods() -fit_mcmc$constrain_variables(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1)) +fit_mcmc$constrain_variables(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2)) } } +\seealso{ +\code{\link[=log_prob]{log_prob()}}, \code{\link[=grad_log_prob]{grad_log_prob()}}, \code{\link[=constrain_variables]{constrain_variables()}}, +\code{\link[=unconstrain_variables]{unconstrain_variables()}}, \code{\link[=unconstrain_draws]{unconstrain_draws()}}, \code{\link[=variable_skeleton]{variable_skeleton()}}, +\code{\link[=hessian]{hessian()}} +} diff --git a/man/fit-method-grad_log_prob.Rd b/man/fit-method-grad_log_prob.Rd index e0a58d487..ef42d6c3d 100644 --- a/man/fit-method-grad_log_prob.Rd +++ b/man/fit-method-grad_log_prob.Rd @@ -9,11 +9,11 @@ given vector of unconstrained parameters} grad_log_prob(unconstrained_variables, jacobian_adjustment = TRUE) } \arguments{ -\item{unconstrained_variables}{(numeric) A vector of unconstrained parameters to be passed -to \code{grad_log_prob}} +\item{unconstrained_variables}{(numeric) A vector of unconstrained parameters +to be passed to \code{grad_log_prob}.} -\item{jacobian_adjustment}{(bool) Whether to include the log-density adjustments from -un/constraining variables} +\item{jacobian_adjustment}{(bool) Whether to include the log-density +adjustments from un/constraining variables.} } \description{ The \verb{$grad_log_prob()} method provides access to the @@ -23,7 +23,12 @@ Stan model's \code{log_prob} function and its derivative \dontrun{ fit_mcmc <- cmdstanr_example("logistic", method = "sample") fit_mcmc$init_model_methods() -fit_mcmc$grad_log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1)) +fit_mcmc$grad_log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2)) } } +\seealso{ +\code{\link[=log_prob]{log_prob()}}, \code{\link[=grad_log_prob]{grad_log_prob()}}, \code{\link[=constrain_variables]{constrain_variables()}}, +\code{\link[=unconstrain_variables]{unconstrain_variables()}}, \code{\link[=unconstrain_draws]{unconstrain_draws()}}, \code{\link[=variable_skeleton]{variable_skeleton()}}, +\code{\link[=hessian]{hessian()}} +} diff --git a/man/fit-method-hessian.Rd b/man/fit-method-hessian.Rd index 3674e926b..fe384c9d9 100644 --- a/man/fit-method-hessian.Rd +++ b/man/fit-method-hessian.Rd @@ -9,11 +9,11 @@ for a given vector of unconstrained parameters} hessian(unconstrained_variables, jacobian_adjustment = TRUE) } \arguments{ -\item{unconstrained_variables}{(numeric) A vector of unconstrained parameters to be passed -to \code{hessian}} +\item{unconstrained_variables}{(numeric) A vector of unconstrained parameters +to be passed to \code{hessian}.} -\item{jacobian_adjustment}{(bool) Whether to include the log-density adjustments from -un/constraining variables} +\item{jacobian_adjustment}{(bool) Whether to include the log-density +adjustments from un/constraining variables.} } \description{ The \verb{$hessian()} method provides access to the @@ -21,9 +21,14 @@ Stan model's \code{log_prob}, its derivative, and its hessian } \examples{ \dontrun{ -fit_mcmc <- cmdstanr_example("logistic", method = "sample") -fit_mcmc$init_model_methods() -fit_mcmc$hessian(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1)) +# fit_mcmc <- cmdstanr_example("logistic", method = "sample") +# fit_mcmc$init_model_methods(hessian = TRUE) +# fit_mcmc$hessian(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2)) } } +\seealso{ +\code{\link[=log_prob]{log_prob()}}, \code{\link[=grad_log_prob]{grad_log_prob()}}, \code{\link[=constrain_variables]{constrain_variables()}}, +\code{\link[=unconstrain_variables]{unconstrain_variables()}}, \code{\link[=unconstrain_draws]{unconstrain_draws()}}, \code{\link[=variable_skeleton]{variable_skeleton()}}, +\code{\link[=hessian]{hessian()}} +} diff --git a/man/fit-method-init_model_methods.Rd b/man/fit-method-init_model_methods.Rd index 6561b0cde..165e804a6 100644 --- a/man/fit-method-init_model_methods.Rd +++ b/man/fit-method-init_model_methods.Rd @@ -4,7 +4,7 @@ \alias{fit-method-init_model_methods} \alias{init_model_methods} \title{Compile additional methods for accessing the model log-probability function -and parameter constraining and unconstraining. This requires the \code{Rcpp} package.} +and parameter constraining and unconstraining.} \usage{ init_model_methods(seed = 0, verbose = FALSE, hessian = FALSE) } @@ -16,13 +16,22 @@ init_model_methods(seed = 0, verbose = FALSE, hessian = FALSE) \item{hessian}{(boolean) Whether to expose the (experimental) hessian method.} } \description{ -The \verb{$init_model_methods()} compiles and initializes the -\code{log_prob}, \code{grad_log_prob}, \code{constrain_variables}, and \code{unconstrain_variables} functions. +The \verb{$init_model_methods()} method compiles and initializes the +\code{log_prob}, \code{grad_log_prob}, \code{constrain_variables}, \code{unconstrain_variables} +and \code{unconstrain_draws} functions. These are then available as methods of +the fitted model object. This requires the \code{Rcpp} package. + +Note: there may be many compiler warnings emitted during compilation but +these can be ignored so long as they are warnings and not errors. } \examples{ \dontrun{ fit_mcmc <- cmdstanr_example("logistic", method = "sample") fit_mcmc$init_model_methods() } - +} +\seealso{ +\code{\link[=log_prob]{log_prob()}}, \code{\link[=grad_log_prob]{grad_log_prob()}}, \code{\link[=constrain_variables]{constrain_variables()}}, +\code{\link[=unconstrain_variables]{unconstrain_variables()}}, \code{\link[=unconstrain_draws]{unconstrain_draws()}}, \code{\link[=variable_skeleton]{variable_skeleton()}}, +\code{\link[=hessian]{hessian()}} } diff --git a/man/fit-method-log_prob.Rd b/man/fit-method-log_prob.Rd index cbdbb93a0..c35e48431 100644 --- a/man/fit-method-log_prob.Rd +++ b/man/fit-method-log_prob.Rd @@ -20,7 +20,12 @@ The \verb{$log_prob()} method provides access to the Stan model's \code{log_prob \dontrun{ fit_mcmc <- cmdstanr_example("logistic", method = "sample") fit_mcmc$init_model_methods() -fit_mcmc$log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2, 1.1)) +fit_mcmc$log_prob(unconstrained_variables = c(0.5, 1.2, 1.1, 2.2)) } } +\seealso{ +\code{\link[=log_prob]{log_prob()}}, \code{\link[=grad_log_prob]{grad_log_prob()}}, \code{\link[=constrain_variables]{constrain_variables()}}, +\code{\link[=unconstrain_variables]{unconstrain_variables()}}, \code{\link[=unconstrain_draws]{unconstrain_draws()}}, \code{\link[=variable_skeleton]{variable_skeleton()}}, +\code{\link[=hessian]{hessian()}} +} diff --git a/man/fit-method-unconstrain_draws.Rd b/man/fit-method-unconstrain_draws.Rd index 947e47fe2..6764ecb53 100644 --- a/man/fit-method-unconstrain_draws.Rd +++ b/man/fit-method-unconstrain_draws.Rd @@ -36,3 +36,8 @@ unconstrained_draws <- fit_mcmc$unconstrain_draws(draws = fit_mcmc$draws()) } } +\seealso{ +\code{\link[=log_prob]{log_prob()}}, \code{\link[=grad_log_prob]{grad_log_prob()}}, \code{\link[=constrain_variables]{constrain_variables()}}, +\code{\link[=unconstrain_variables]{unconstrain_variables()}}, \code{\link[=unconstrain_draws]{unconstrain_draws()}}, \code{\link[=variable_skeleton]{variable_skeleton()}}, +\code{\link[=hessian]{hessian()}} +} diff --git a/man/fit-method-unconstrain_variables.Rd b/man/fit-method-unconstrain_variables.Rd index 472c1488f..69c6e308e 100644 --- a/man/fit-method-unconstrain_variables.Rd +++ b/man/fit-method-unconstrain_variables.Rd @@ -8,8 +8,8 @@ unconstrain_variables(variables) } \arguments{ -\item{variables}{(list) A list of parameter values to transform, in the same format as -provided to the \code{init} argument of the \verb{$sample()} method} +\item{variables}{(list) A list of parameter values to transform, in the same +format as provided to the \code{init} argument of the \verb{$sample()} method.} } \description{ The \verb{$unconstrain_variables()} method transforms input parameters to @@ -23,3 +23,8 @@ fit_mcmc$unconstrain_variables(list(alpha = 0.5, beta = c(0.7, 1.1, 0.2))) } } +\seealso{ +\code{\link[=log_prob]{log_prob()}}, \code{\link[=grad_log_prob]{grad_log_prob()}}, \code{\link[=constrain_variables]{constrain_variables()}}, +\code{\link[=unconstrain_variables]{unconstrain_variables()}}, \code{\link[=unconstrain_draws]{unconstrain_draws()}}, \code{\link[=variable_skeleton]{variable_skeleton()}}, +\code{\link[=hessian]{hessian()}} +} diff --git a/man/fit-method-variable_skeleton.Rd b/man/fit-method-variable_skeleton.Rd index 859dab332..2116d0ad2 100644 --- a/man/fit-method-variable_skeleton.Rd +++ b/man/fit-method-variable_skeleton.Rd @@ -26,3 +26,8 @@ fit_mcmc$variable_skeleton() } } +\seealso{ +\code{\link[=log_prob]{log_prob()}}, \code{\link[=grad_log_prob]{grad_log_prob()}}, \code{\link[=constrain_variables]{constrain_variables()}}, +\code{\link[=unconstrain_variables]{unconstrain_variables()}}, \code{\link[=unconstrain_draws]{unconstrain_draws()}}, \code{\link[=variable_skeleton]{variable_skeleton()}}, +\code{\link[=hessian]{hessian()}} +} diff --git a/man/model-method-check_syntax.Rd b/man/model-method-check_syntax.Rd index 885061116..9193a684d 100644 --- a/man/model-method-check_syntax.Rd +++ b/man/model-method-check_syntax.Rd @@ -80,6 +80,7 @@ The Stan and CmdStan documentation: Other CmdStanModel methods: \code{\link{model-method-compile}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, diff --git a/man/model-method-compile.Rd b/man/model-method-compile.Rd index 9ff75f473..b4d7b35f8 100644 --- a/man/model-method-compile.Rd +++ b/man/model-method-compile.Rd @@ -60,12 +60,17 @@ not modified since last compiled. The default is \code{FALSE}. Can also be set via a global \code{cmdstanr_force_recompile} option.} \item{compile_model_methods}{(logical) Compile additional model methods -(\code{log_prob()}, \code{grad_log_prob()}, \code{constrain_pars()}, \code{unconstrain_pars()})} +(\code{log_prob()}, \code{grad_log_prob()}, \code{constrain_variables()}, +\code{unconstrain_variables()}).} \item{compile_hessian_method}{(logical) Should the (experimental) \code{hessian()} method be be compiled with the model methods?} -\item{compile_standalone}{(logical) Should functions in the Stan model be compiled for used in R?} +\item{compile_standalone}{(logical) Should functions in the Stan model be +compiled for use in R? If \code{TRUE} the functions will be available via the +\code{functions} field in the compiled model object. This can also be done after +compilation using the +\code{\link[=model-method-expose_functions]{$expose_functions()}} method.} \item{threads}{Deprecated and will be removed in a future release. Please turn on threading via \code{cpp_options = list(stan_threads = TRUE)} instead.} @@ -142,6 +147,7 @@ The Stan and CmdStan documentation: Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, diff --git a/man/model-method-diagnose.Rd b/man/model-method-diagnose.Rd index 371b71619..7f9cde7d4 100644 --- a/man/model-method-diagnose.Rd +++ b/man/model-method-diagnose.Rd @@ -122,6 +122,7 @@ The Stan and CmdStan documentation: Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-compile}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, diff --git a/man/model-method-expose_functions.Rd b/man/model-method-expose_functions.Rd new file mode 100644 index 000000000..c01aa3c1a --- /dev/null +++ b/man/model-method-expose_functions.Rd @@ -0,0 +1,83 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/model.R +\name{model-method-expose_functions} +\alias{model-method-expose_functions} +\alias{expose_functions} +\alias{fit-method-expose_functions} +\title{Expose Stan functions to R} +\usage{ +expose_functions(global = FALSE, verbose = FALSE) +} +\arguments{ +\item{global}{(logical) Should the functions be added to the Global +Environment? The default is \code{FALSE}, in which case the functions are +available via the \code{functions} field of the R6 object.} + +\item{verbose}{(logical) Should detailed information about generated code be +printed to the console? Defaults to \code{FALSE}.} +} +\description{ +The \verb{$expose_functions()} method of a \code{\link{CmdStanModel}} object +will compile the functions in the Stan program's \code{functions} block and +expose them for use in \R. This can also be specified via the +\code{compile_standalone} argument to the \code{\link[=model-method-compile]{$compile()}} +method. + +This method is also available for fitted model objects (\code{\link{CmdStanMCMC}}, \code{\link{CmdStanVB}}, etc.). +See \strong{Examples}. + +Note: there may be many compiler warnings emitted during compilation but +these can be ignored so long as they are warnings and not errors. +} +\examples{ +\dontrun{ +stan_file <- write_stan_file( + " + functions { + real a_plus_b(real a, real b) { + return a + b; + } + } + parameters { + real x; + } + model { + x ~ std_normal(); + } + " +) +mod <- cmdstan_model(stan_file) +mod$expose_functions() +mod$functions$a_plus_b(1, 2) + +fit <- mod$sample(refresh = 0) +fit$expose_functions() # already compiled because of above but this would compile them otherwise +fit$functions$a_plus_b(1, 2) +} + + +} +\seealso{ +The CmdStanR website +(\href{https://mc-stan.org/cmdstanr/}{mc-stan.org/cmdstanr}) for online +documentation and tutorials. + +The Stan and CmdStan documentation: +\itemize{ +\item Stan documentation: \href{https://mc-stan.org/users/documentation/}{mc-stan.org/users/documentation} +\item CmdStan User’s Guide: \href{https://mc-stan.org/docs/cmdstan-guide/}{mc-stan.org/docs/cmdstan-guide} +} + +Other CmdStanModel methods: +\code{\link{model-method-check_syntax}}, +\code{\link{model-method-compile}}, +\code{\link{model-method-diagnose}}, +\code{\link{model-method-format}}, +\code{\link{model-method-generate-quantities}}, +\code{\link{model-method-optimize}}, +\code{\link{model-method-sample_mpi}}, +\code{\link{model-method-sample}}, +\code{\link{model-method-variables}}, +\code{\link{model-method-variational}} +} +\concept{CmdStanModel methods} diff --git a/man/model-method-format.Rd b/man/model-method-format.Rd index 777e7da01..1d7ed11bd 100644 --- a/man/model-method-format.Rd +++ b/man/model-method-format.Rd @@ -79,6 +79,7 @@ Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-compile}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, \code{\link{model-method-sample_mpi}}, diff --git a/man/model-method-generate-quantities.Rd b/man/model-method-generate-quantities.Rd index da75ac5f3..5a80bb736 100644 --- a/man/model-method-generate-quantities.Rd +++ b/man/model-method-generate-quantities.Rd @@ -172,6 +172,7 @@ Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-compile}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-optimize}}, \code{\link{model-method-sample_mpi}}, diff --git a/man/model-method-optimize.Rd b/man/model-method-optimize.Rd index a8735e7fe..42f81d07a 100644 --- a/man/model-method-optimize.Rd +++ b/man/model-method-optimize.Rd @@ -300,6 +300,7 @@ Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-compile}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-sample_mpi}}, diff --git a/man/model-method-sample.Rd b/man/model-method-sample.Rd index 2c6b693db..e0955abb4 100644 --- a/man/model-method-sample.Rd +++ b/man/model-method-sample.Rd @@ -412,6 +412,7 @@ Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-compile}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, diff --git a/man/model-method-sample_mpi.Rd b/man/model-method-sample_mpi.Rd index a99b96952..89981273f 100644 --- a/man/model-method-sample_mpi.Rd +++ b/man/model-method-sample_mpi.Rd @@ -311,6 +311,7 @@ Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-compile}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, diff --git a/man/model-method-variables.Rd b/man/model-method-variables.Rd index f2fb296f5..aa609ddda 100644 --- a/man/model-method-variables.Rd +++ b/man/model-method-variables.Rd @@ -40,6 +40,7 @@ Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-compile}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, diff --git a/man/model-method-variational.Rd b/man/model-method-variational.Rd index a64a5a232..21c92fe24 100644 --- a/man/model-method-variational.Rd +++ b/man/model-method-variational.Rd @@ -304,6 +304,7 @@ Other CmdStanModel methods: \code{\link{model-method-check_syntax}}, \code{\link{model-method-compile}}, \code{\link{model-method-diagnose}}, +\code{\link{model-method-expose_functions}}, \code{\link{model-method-format}}, \code{\link{model-method-generate-quantities}}, \code{\link{model-method-optimize}}, diff --git a/man/read_sample_csv.Rd b/man/read_sample_csv.Rd index 76bb5463c..4c7661ae7 100644 --- a/man/read_sample_csv.Rd +++ b/man/read_sample_csv.Rd @@ -13,3 +13,4 @@ read_sample_csv(files, variables = NULL, sampler_diagnostics = NULL) \description{ Deprecated. Use \code{\link[=read_cmdstan_csv]{read_cmdstan_csv()}} instead. } +\keyword{internal} diff --git a/man/stan_threads.Rd b/man/stan_threads.Rd index e79e882ff..e8cf8470d 100644 --- a/man/stan_threads.Rd +++ b/man/stan_threads.Rd @@ -19,3 +19,4 @@ The value of the environment variable \code{STAN_NUM_THREADS}. \description{ DEPRECATED. Please use the \code{threads_per_chain} argument when fitting the model. } +\keyword{internal} diff --git a/man/write_stan_tempfile.Rd b/man/write_stan_tempfile.Rd index be3073ffa..e663302fe 100644 --- a/man/write_stan_tempfile.Rd +++ b/man/write_stan_tempfile.Rd @@ -20,3 +20,4 @@ is used.} \description{ This function is deprecated. Please use \code{\link[=write_stan_file]{write_stan_file()}} instead. } +\keyword{internal} diff --git a/vignettes/articles-online-only/opencl-files/bernoulli_logit_glm.stan b/vignettes/articles-online-only/opencl-files/bernoulli_logit_glm.stan index 133521db4..3d5514364 100644 --- a/vignettes/articles-online-only/opencl-files/bernoulli_logit_glm.stan +++ b/vignettes/articles-online-only/opencl-files/bernoulli_logit_glm.stan @@ -2,7 +2,7 @@ data { int k; int n; matrix[n, k] X; - int y[n]; + array[n] int y; } parameters { vector[k] beta; diff --git a/vignettes/articles-online-only/opencl.Rmd b/vignettes/articles-online-only/opencl.Rmd index bdf35bf77..bc8599efd 100644 --- a/vignettes/articles-online-only/opencl.Rmd +++ b/vignettes/articles-online-only/opencl.Rmd @@ -13,7 +13,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r child="../children/settings-knitr.Rmd"} +```{r child="../children/_settings-knitr.Rmd"} ``` ## Introduction diff --git a/vignettes/children/settings-knitr.Rmd b/vignettes/children/_settings-knitr.Rmd similarity index 100% rename from vignettes/children/settings-knitr.Rmd rename to vignettes/children/_settings-knitr.Rmd diff --git a/vignettes/children/comparison-with-rstan.md b/vignettes/children/comparison-with-rstan.md index 65233450b..ed1628f1b 100644 --- a/vignettes/children/comparison-with-rstan.md +++ b/vignettes/children/comparison-with-rstan.md @@ -9,15 +9,11 @@ results to output files. ### Advantages of RStan -* Advanced features. We are working on making these available outside of RStan -but currently they are only available to R users via RStan: - - `rstan::log_prob()` - - `rstan::grad_log_prob()` - - `rstan::expose_stan_functions()` - * Allows other developers to distribute R packages with _pre-compiled_ Stan programs (like **rstanarm**) on CRAN. +* Avoids use of R6 classes, which may result in more familiar syntax for many R users. + ### Advantages of CmdStanR * Compatible with latest versions of Stan. Keeping up with Stan releases is @@ -26,8 +22,6 @@ package and new CRAN releases of both **rstan** and **StanHeaders**. With CmdStanR the latest improvements in Stan will be available from R immediately after updating CmdStan using `cmdstanr::install_cmdstan()`. -* Fewer installation issues (e.g., no need to mess with Makevars files). - * Running Stan via external processes results in fewer unexpected crashes, especially in RStudio. diff --git a/vignettes/cmdstanr-internals.Rmd b/vignettes/cmdstanr-internals.Rmd index 93c656f31..164b80085 100644 --- a/vignettes/cmdstanr-internals.Rmd +++ b/vignettes/cmdstanr-internals.Rmd @@ -13,7 +13,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r child="children/settings-knitr.Rmd"} +```{r child="children/_settings-knitr.Rmd"} ``` ## Introduction @@ -127,7 +127,7 @@ is missing a lower bound and a prior for a parameter. stan_file_pedantic <- write_stan_file(" data { int N; - int y[N]; + array[N] int y; } parameters { // should have but omitting to demonstrate pedantic mode @@ -461,7 +461,7 @@ the save file. rm(fit); gc() fit <- readRDS(temp_rds_file) -fit$summary() +fit$print() ``` ## Developing using CmdStanR diff --git a/vignettes/cmdstanr.Rmd b/vignettes/cmdstanr.Rmd index 9350ea541..dcd24d15a 100644 --- a/vignettes/cmdstanr.Rmd +++ b/vignettes/cmdstanr.Rmd @@ -13,16 +13,16 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r child="children/settings-knitr.Rmd"} +```{r child="children/_settings-knitr.Rmd"} ``` ## Introduction -CmdStanR is a lightweight interface to [Stan](https://mc-stan.org/) for R users -(see [CmdStanPy](https://github.com/stan-dev/cmdstanpy) for Python) that -provides an alternative to the traditional [RStan](https://mc-stan.org/rstan/) -interface. See the [*Comparison with RStan*](#comparison-with-rstan) section -later in this vignette for more details on how the two interfaces differ. +CmdStanR (Command Stan R) is a lightweight interface to +[Stan](https://mc-stan.org/) for R users that provides an alternative to the +traditional [RStan](https://mc-stan.org/rstan/) interface. See the [*Comparison +with RStan*](#comparison-with-rstan) section later in this vignette for more +details on how the two interfaces differ. **CmdStanR is not on CRAN yet**, but the beta release can be installed by running the following command in R. @@ -38,7 +38,6 @@ later in examples. ```{r library, message=FALSE} library(cmdstanr) -check_cmdstan_toolchain(fix = TRUE, quiet = TRUE) library(posterior) library(bayesplot) color_scheme_set("brightblue") @@ -188,7 +187,7 @@ first argument specifies the variables to summarize and any arguments after that are passed on to `posterior::summarise_draws()` to specify which summaries to compute, whether to use multiple cores, etc. -```{r summary} +```{r summary, eval=FALSE} fit$summary() fit$summary(variables = c("theta", "lp__"), "mean", "sd") @@ -203,6 +202,24 @@ fit$summary( ) ``` +```{r, echo=FALSE} +# NOTE: the hack of using print.data.frame in chunks with echo=FALSE +# is used because the pillar formatting of posterior draws_summary objects +# isn't playing nicely with pkgdown::build_articles(). +options(digits = 2) +print.data.frame(fit$summary()) + +print.data.frame(fit$summary(variables = c("theta", "lp__"), "mean", "sd")) + +print.data.frame(fit$summary("theta", pr_lt_half = ~ mean(. <= 0.5))) + +print.data.frame(fit$summary( + variables = NULL, + posterior::default_summary_measures(), + extra_quantiles = ~posterior::quantile2(., probs = c(.0275, .975)) +)) +``` + #### CmdStan's stansummary utility CmdStan itself provides a `stansummary` utility that can be called using the @@ -334,11 +351,20 @@ the `$sample()` method demonstrated above. We can find the (penalized) maximum likelihood estimate (MLE) using [`$optimize()`](https://mc-stan.org/cmdstanr/reference/model-method-optimize.html). -```{r optimize} +```{r optimize, eval=FALSE} fit_mle <- mod$optimize(data = data_list, seed = 123) fit_mle$summary() # includes lp__ (log prob calculated by Stan program) fit_mle$mle("theta") ``` +```{r, echo=FALSE} +# NOTE: the hack of using print.data.frame in chunks with echo=FALSE +# is used because the pillar formatting of posterior draws_summary objects +# isn't playing nicely with pkgdown::build_articles(). +options(digits = 2) +fit_mle <- mod$optimize(data = data_list, seed = 123) +print.data.frame(fit_mle$summary()) # includes lp__ (log prob calculated by Stan program) +fit_mle$mle("theta") +``` Here's a plot comparing the penalized MLE to the posterior distribution of `theta`. @@ -354,10 +380,18 @@ We can run Stan's experimental variational Bayes algorithm (ADVI) using the [`$variational()`](https://mc-stan.org/cmdstanr/reference/model-method-variational.html) method. -```{r variational} +```{r variational, eval=FALSE} fit_vb <- mod$variational(data = data_list, seed = 123, output_samples = 4000) fit_vb$summary("theta") ``` +```{r, echo=FALSE} +# NOTE: the hack of using print.data.frame in chunks with echo=FALSE +# is used because the pillar formatting of posterior draws_summary objects +# isn't playing nicely with pkgdown::build_articles(). +options(digits = 2) +fit_vb <- mod$variational(data = data_list, seed = 123, output_samples = 4000) +print.data.frame(fit_vb$summary("theta")) +``` The `$draws()` method can be used to access the approximate posterior draws. Let's extract the draws, make the same plot we made after MCMC, and compare the diff --git a/vignettes/deprecations.Rmd b/vignettes/deprecations.Rmd index 6fb1b6e8a..920aebed6 100644 --- a/vignettes/deprecations.Rmd +++ b/vignettes/deprecations.Rmd @@ -13,7 +13,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r child="children/settings-knitr.Rmd"} +```{r child="children/_settings-knitr.Rmd"} ``` ## Introduction diff --git a/vignettes/posterior.Rmd b/vignettes/posterior.Rmd index 3d246a6f1..465391d8b 100644 --- a/vignettes/posterior.Rmd +++ b/vignettes/posterior.Rmd @@ -12,17 +12,31 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r child="children/settings-knitr.Rmd"} +```{r child="children/_settings-knitr.Rmd"} +``` + + +```{r, include=FALSE} +options(digits=2) ``` ## Summary We can easily customise the summary statistics reported by `$summary()` and `$print()`. -```{r} +```{r eval=FALSE} fit <- cmdstanr::cmdstanr_example("schools", method = "sample") fit$summary() ``` +```{r echo=FALSE} +fit <- cmdstanr::cmdstanr_example("schools", method = "sample") +print.data.frame(fit$summary()) +``` By default all variables are summaries with the follow functions: ```{r} @@ -30,23 +44,33 @@ posterior::default_summary_measures() ``` To change the variables summarised, we use the variables argument -```{r} +```{r eval=FALSE} fit$summary(variables = c("mu", "tau")) ``` +```{r echo=FALSE} +print.data.frame(fit$summary(variables = c("mu", "tau"))) +``` We can additionally change which functions are used -```{r} +```{r eval=FALSE} fit$summary(variables = c("mu", "tau"), mean, sd) ``` +```{r echo=FALSE} +print.data.frame(fit$summary(variables = c("mu", "tau"), mean, sd)) +``` To summarise all variables with non-default functions, it is necessary to set explicitly set the variables argument, either to `NULL` or the full vector of variable names. -```{r} +```{r eval=FALSE} fit$metadata()$model_params fit$summary(variables = NULL, "mean", "median") ``` +```{r echo=FALSE} +fit$metadata()$model_params +print.data.frame(fit$summary(variables = NULL, "mean", "median")) +``` Summary functions can be specified by character string, function, or using a formula (or anything else supported by [rlang::as_function]). If these arguments are named, those names will be used in the tibble output. If the summary results are named they will take precedence. -```{r} +```{r eval=FALSE} my_sd <- function(x) c(My_SD = sd(x)) fit$summary( c("mu", "tau"), @@ -57,28 +81,57 @@ fit$summary( Minimum = function(x) min(x) ) ``` +```{r echo=FALSE} +my_sd <- function(x) c(My_SD = sd(x)) +print.data.frame(fit$summary( + c("mu", "tau"), + MEAN = mean, + "median", + my_sd, + ~quantile(.x, probs = c(0.1, 0.9)), + Minimum = function(x) min(x) +)) +``` + Arguments to all summary functions can also be specified with `.args`. -```{r} +```{r eval=FALSE} fit$summary(c("mu", "tau"), quantile, .args = list(probs = c(0.025, .05, .95, .975))) ``` +```{r echo=FALSE} +print.data.frame(fit$summary(c("mu", "tau"), quantile, .args = list(probs = c(0.025, .05, .95, .975)))) +``` The summary functions are applied to the array of sample values, with dimension `iter_sampling`x`chains`. -```{r} +```{r eval=FALSE} fit$summary(variables = NULL, dim, colMeans) ``` +```{r echo=FALSE} +print.data.frame(fit$summary(variables = NULL, dim, colMeans)) +``` -For this reason users may have unexpected results if they use [stats::var()] directly, as it will return a covariance matrix. An alternative is the [distributional::variance] function. -```{r} -fit$summary(c("mu", "tau"), distributional::variance, ~var(as.vector(.x))) + +For this reason users may have unexpected results if they use `stats::var()` directly, as it will return a covariance matrix. An alternative is the `distributional::variance()` function, +which can also be accessed via `posterior::variance()`. +```{r eval=FALSE} +fit$summary(c("mu", "tau"), posterior::variance, ~var(as.vector(.x))) ``` +```{r echo=FALSE} +print.data.frame(fit$summary(c("mu", "tau"), posterior::variance, ~var(as.vector(.x)))) +``` + Summary functions need not be numeric, but these won't work with `$print()`. -```{r} +```{r eval=FALSE} strict_pos <- function(x) if (all(x > 0)) "yes" else "no" fit$summary(variables = NULL, "Strictly Positive" = strict_pos) # fit$print(variables = NULL, "Strictly Positive" = strict_pos) ``` +```{r echo=FALSE} +strict_pos <- function(x) if (all(x > 0)) "yes" else "no" +print.data.frame(fit$summary(variables = NULL, "Strictly Positive" = strict_pos)) +# fit$print(variables = NULL, "Strictly Positive" = strict_pos) +``` -For more information, see [posterior::summarise_draws()], which is is called by `$summary()`. +For more information, see `posterior::summarise_draws()`, which is called by `$summary()`. diff --git a/vignettes/profiling.Rmd b/vignettes/profiling.Rmd index 59323bb1a..1f9eb0dc5 100644 --- a/vignettes/profiling.Rmd +++ b/vignettes/profiling.Rmd @@ -13,7 +13,7 @@ vignette: > %\VignetteEncoding{UTF-8} --- -```{r child="children/settings-knitr.Rmd"} +```{r child="children/_settings-knitr.Rmd"} ``` ## Introduction