Skip to content

Commit

Permalink
Merge pull request #427 from tlverse/devel
Browse files Browse the repository at this point in the history
Merge devel into master
  • Loading branch information
jeremyrcoyle authored Apr 29, 2024
2 parents 6544257 + fdfe83f commit b794bb1
Show file tree
Hide file tree
Showing 359 changed files with 9,497 additions and 44,463 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,3 +30,4 @@ deploy.sh
^LICENSE$
man-roxygen
^_pkgdown\.yml$
^pkgdown$
11 changes: 5 additions & 6 deletions .github/workflows/R-CMD-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,18 @@ jobs:

steps:
- name: Checkout repo
uses: actions/checkout@v2
uses: actions/checkout@v3

- name: Setup R
uses: r-lib/actions/setup-r@master
uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}

- name: Install pandoc
uses: r-lib/actions/setup-pandoc@v1
uses: r-lib/actions/setup-pandoc@v2

- name: Install tinyTeX
uses: r-lib/actions/setup-tinytex@v1
uses: r-lib/actions/setup-tinytex@v2

- name: Install system dependencies
if: runner.os == 'Linux'
Expand All @@ -50,8 +50,7 @@ jobs:
- name: Install package dependencies
run: |
install.packages(c("remotes", "rcmdcheck", "covr", "sessioninfo"))
if(Sys.info()["sysname"] == "Windows") install.packages("igraph", type = "binary")
install.packages(c("remotes", "devtools", "rcmdcheck", "covr", "sessioninfo"))
remotes::install_deps(dependencies = TRUE)
shell: Rscript {0}

Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ README.html
.DS_Store
doc
Meta
docs
18 changes: 9 additions & 9 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: sl3
Title: Pipelines for Machine Learning and Super Learning
Version: 1.4.4
Version: 1.4.5
Authors@R: c(
person("Jeremy", "Coyle", email = "jeremyrcoyle@gmail.com",
role = c("aut", "cre", "cph"),
Expand All @@ -27,9 +27,9 @@ Authors@R: c(
)
Maintainer: Jeremy Coyle <jeremyrcoyle@gmail.com>
Description: A modern implementation of the Super Learner prediction algorithm,
coupled with a general-purpose framework for composing arbitrary pipelines
coupled with a general purpose framework for composing arbitrary pipelines
for machine learning tasks.
Depends: R (>= 3.1.0)
Depends: R (>= 3.6.0)
Imports:
data.table,
assertthat,
Expand All @@ -44,7 +44,6 @@ Imports:
ggplot2,
digest,
Rdpack,
imputeMissings,
dplyr,
caret,
ROCR
Expand All @@ -69,10 +68,9 @@ Suggests:
glmnet,
grf,
gbm,
hal9001 (>= 0.4.0),
hal9001 (>= 0.4.4),
h2o,
keras,
kerasR,
nloptr,
nnls,
randomForest,
Expand All @@ -87,10 +85,12 @@ Suggests:
lightgbm,
dbarts,
gam (>= 1.15.0),
haldensify (>= 0.1.5),
haldensify (>= 0.2.3),
mgcv,
hts,
GA
GA,
SIS,
partykit
Remotes:
github::tlverse/origami,
github::tlverse/hal9001@devel,
Expand All @@ -106,5 +106,5 @@ VignetteBuilder:
knitr,
R.rsp
Roxygen: list(markdown = TRUE, old_usage = TRUE, r6 = FALSE)
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
RdMacros: Rdpack
10 changes: 7 additions & 3 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
# Generated by roxygen2: do not edit by hand

S3method("[",sl3_Task)
export(CV_lrnr_sl)
export(Custom_chain)
export(Lrnr_HarmonicReg)
export(Lrnr_arima)
export(Lrnr_bartMachine)
export(Lrnr_base)
export(Lrnr_bayesglm)
export(Lrnr_bilstm)
export(Lrnr_bound)
export(Lrnr_caret)
export(Lrnr_cv)
Expand All @@ -25,8 +23,11 @@ export(Lrnr_gam)
export(Lrnr_gbm)
export(Lrnr_glm)
export(Lrnr_glm_fast)
export(Lrnr_glm_semiparametric)
export(Lrnr_glmnet)
export(Lrnr_glmtree)
export(Lrnr_grf)
export(Lrnr_grfcate)
export(Lrnr_gru_keras)
export(Lrnr_gts)
export(Lrnr_h2o_classifier)
Expand Down Expand Up @@ -76,6 +77,7 @@ export(Variable_Type)
export(args_to_list)
export(custom_ROCR_risk)
export(customize_chain)
export(cv_sl)
export(debug_predict)
export(debug_train)
export(debugonce_predict)
Expand Down Expand Up @@ -113,6 +115,7 @@ export(pack_predictions)
export(pooled_hazard_task)
export(predict_classes)
export(prediction_plot)
export(process_data)
export(risk)
export(safe_dim)
export(sl3Options)
Expand Down Expand Up @@ -161,19 +164,20 @@ importFrom(ggplot2,geom_point)
importFrom(ggplot2,ggplot)
importFrom(ggplot2,labs)
importFrom(ggplot2,scale_x_discrete)
importFrom(imputeMissings,impute)
importFrom(methods,is)
importFrom(origami,combiner_c)
importFrom(origami,cross_validate)
importFrom(origami,fold_index)
importFrom(origami,folds2foldvec)
importFrom(origami,folds_vfold)
importFrom(origami,id_folds_to_folds)
importFrom(origami,make_folds)
importFrom(origami,training)
importFrom(origami,validation)
importFrom(stats,aggregate)
importFrom(stats,arima)
importFrom(stats,binomial)
importFrom(stats,coef)
importFrom(stats,family)
importFrom(stats,gaussian)
importFrom(stats,glm)
Expand Down
93 changes: 93 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,96 @@
# sl3 1.4.5
* Changed `CV_lrnr_sl` to `cv_sl`
* Added `Lrnr_glmtree`, which uses the `partykit` R package to fit recursive
partitioning and regression trees in a generalized linear model.
* Added fold-specific SL coefficients to the output of `cv_sl`, and removed
the coefficients column from the returned `cv_risk` table.
* Added `get_sl_revere_risk` argument to `Lrnr_sl`'s `cv_risk` method to
provide the option (with default of `FALSE`) to add a super learner's
revere-based risk (not a true cross-validated risk) to `cv_risk` output.
* Changed default metalearner to `Lrnr_nnls` for binary and continuous outcomes.
* Added `cv_control` argument to `Lrnr_sl`, which allows users to define
specific cross-validation structures for fitting the super learner. This is
intended for use in a nested cross-validation scheme (such as cross-validated
super learner, `cv_sl`, or when `Lrnr_sl` is considered in the list of
candidate `learners` in another `Lrnr_sl`). In addition to constructing
clustered cross-validation with respect to `id`, `cv_control` also
can be used to construct stratified cross-validation folds for `Lrnr_sl`.
* `Lrnr_caret` now works for binary and categorical outcomes. Previous versions
state that these discrete outcome types are supported by `Lrnr_caret`, but
the functionality would brake.
* Added public function for `sl3_Task`, `get_folds`, which takes in
`origami::make_folds` arguments and returns the folds. This function is
now called by `task$folds` and it can be called in train as well, to obtain
folds from a task that have a non-default fold structure.
* Learners that use CV internally (i.e., as part of their procedure to select
tuning parameters), including `Lrnr_caret`, `Lrnr_glmnet`, `Lrnr_hal9001`,
and `Lrnr_sl`, use `task$get_folds` to create folds. The learners' folds
respect the default CV fold structure in `sl3` tasks (clustered CV when `id`
is supplied in the task; and stratified CV when outcomes are categorical or
binary, and when `id` are nested in strata if `id` supplied to task). However,
`V` can be modified according to the learner-specific parameters. (`Lrnr_sl`
has a few extra CV tuning arguments, which are thoroughly documented in
`cv_control` and modifications are only recommended for advanced use of
`Lrnr_sl`.)
* Fixed learner parameter `formula` bug, which was causing formulas with "." to
return an empty task, and therefore learners with these formulas to fail.
* Fixed bug in `Lrnr_cv_selector` metalearner, which was using the wrong folds
to calculate the cross-validated risk estimate. This impacted
`Lrnr_cv_selector` when `eval_function` was not a loss function, e.g. AUC.
By calling `task$folds` on the metalearner's training task, we were deriving
folds from the matrix of cross-validated predictions, and not using the folds
for cross-validating the candidates. We now require the folds for cross-
validating the candidates (i.e., the folds in task for training `Lrnr_sl`) to
be supplied when `Lrnr_cv_selector`'s `eval_function` is not a loss function.
* `Lrnr_caret` and `Lrnr_rpart` factor binary outcomes in their `train` methods,
thereby considering a classification prediction problem. To avoid this
behavior and consider a regression prediction problem with a binary outcome
(e.g., to minimize the squared error or negative log likelihood loss in a
binary outcome prediction problem), users can set
`factor_binary_outcome = FALSE` when they instantiate the learner.
* Tasks can be created without an outcome. This comes in handy when creating
a task that is used only for prediction, not for training, and leads
to the task's outcome type being set to "none" if it's not supplied.
* When the variable type of the outcome (i.e., `outcome_type`) is necessary for
a learner's `predict` method (e.g., if categorical outcome predictions need to
be "packed" together), the outcome type in the **training task** should be
used. That is, `private$.training_outcome_type` should be used to obtain
the outcome type in a learner's `predict` method; the task supplied to
`predict` should not be used. The following learners were referring to the
task supplied to `predict` in order to retain the outcome type, and they were
modified to use the training task's outcome type instead: `Lrnr_svm`,
`Lrnr_randomForest`, `Lrnr_ranger`, `Lrnr_rpart`, `Lrnr_polspline`. The
issue with pulling the outcome type from the task supplied to `predict` is
that the outcome type of that task might be "none", if the `outcome` argument
is not supplied to it.
* Updated the learner template (inst/templates/Lrnr_template.R) to reflect the
new formatting guidelines for learner documentation.
* Updated documentation for `sl3_Task` parameters (man-roxygen/sl3_Task_extra.R).
Specifically, `drop_missing_outcome` and `flag` were added; `offset`
description was fixed; description of `folds` was added, including how to
modify it and the default; and description of how the default cross-validation
structure considers `id` and discrete (binary and categorical) outcome types
to construct clustered and stratified cross-validation schemes, respectively,
was added.
* Added documentation for the function `process_data` (R/process_data.R), which
is called when instantiating a task, to process the covariates and identify
missingness in the outcome.
* Added `Lrnr_grfcate`, a prediction function estimator for conditional average
treatment effect (CATE), which uses the `causal_forest` function in `grf`
package. This learner is intended for use in the `tmle3mopttx` package, where
CATE estimation and prediction is required.
* Added flexibility and error handling to optional `sl3_Task` argument
`outcome_type`. Either `"binomial"`, `"binary"` or `binomial()` can be
supplied for a binary outcome; `"continuous"`,`"gaussian"`, or `gaussian()`
for a continuous outcome; `"categorical"`, `"multinomial"`, or `mutlinomial()"`
for a categorical outcome. As before, when `outcome_type` is not supplied, we
will try to detect it from the outcome values. If the supplied `outcome_type`
differs from the detected one, a warning is now thrown. If `outcome_type` is
supplied but invalid, then an error is thrown upon `sl3_Task` instantiation,
opposed to learner training.
* Cross-validated super learner (`cv_sl`) returns the cross-validated
predictions for the super learner and its candidates.

# sl3 1.4.4
* Updates to `Lrnr_nnls` to support binary outcomes, including support for
convexity of the resultant model fit and warnings on prediction quality.
Expand Down
24 changes: 0 additions & 24 deletions R/CV_Lrnr_sl.R

This file was deleted.

4 changes: 2 additions & 2 deletions R/Lrnr_arima.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ Lrnr_arima <- R6Class(
if (length(rm_idx) > 0) {
params$xreg <- as.matrix(task$X[, -rm_idx, with = FALSE])
print(paste(c(
"ARIMA requires matrix of external regressors to not be rank ",
"deficient. The following covariates were removed to counter the ",
"ARIMA requires matrix of external regressors to not be rank",
"deficient. The following covariates were removed to counter the",
"linear combinations:", names(task$X)[rm_idx]
), collapse = " "))
} else {
Expand Down
Loading

0 comments on commit b794bb1

Please sign in to comment.