diff --git a/DESCRIPTION b/DESCRIPTION index 9d93cd0c..d21108fa 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: FFTrees Type: Package Title: Generate, Visualise, and Evaluate Fast-and-Frugal Decision Trees Version: 2.0.0.9000 -Date: 2024-07-15 +Date: 2024-07-20 Authors@R: c(person("Nathaniel", "Phillips", role = c("aut"), email = "Nathaniel.D.Phillips.is@gmail.com", comment = c(ORCID = "0000-0002-8969-7013")), person("Hansjoerg", "Neth", role = c("aut", "cre"), email = "h.neth@uni.kn", comment = c(ORCID = "0000-0001-5427-3141")), person("Jan", "Woike", role = "aut", comment = c(ORCID = "0000-0002-6816-121X")), diff --git a/tests/testthat/test_09_cost.R b/tests/testthat/test_09_cost.R index 7b6b0cfb..1b07a710 100644 --- a/tests/testthat/test_09_cost.R +++ b/tests/testthat/test_09_cost.R @@ -1,4 +1,4 @@ -context("Costs work") +context("Verify costs work") test_that("Using goal = 'cost' kills a high cost cue", { @@ -28,6 +28,7 @@ test_that("Using goal = 'cost' kills a high cost cue", { }) + test_that("Changing costs without changing goal does NOT affect FFT creation", { # Create FFTs with outcome costs 1 for goal 'bacc': @@ -56,6 +57,7 @@ test_that("Changing costs without changing goal does NOT affect FFT creation", { }) + test_that("Changing costs and goal = 'cost' DOES affect FFT creation", { # Create FFTs with outcome costs and goal 'cost': @@ -83,4 +85,5 @@ test_that("Changing costs and goal = 'cost' DOES affect FFT creation", { }) + # eof. diff --git a/tests/testthat/test_10_tree_definitions.R b/tests/testthat/test_10_tree_definitions.R new file mode 100644 index 00000000..1ce5c7e2 --- /dev/null +++ b/tests/testthat/test_10_tree_definitions.R @@ -0,0 +1,128 @@ +context("Get, edit, and use tree.definitions") + +# Create new FFTs from edited tree.definitions: + +test_that("Can get, edit, collect, and create FFTs from tree.definitions", { + + # 1. Create an FFTrees object x (for iris data): ------ + + x <- FFTrees(formula = virginica ~ ., + data = iris.v, + main = "Iris viginica", + decision.labels = c("Not-Vir", "Vir"), + quiet = TRUE) + + + + # 2. Extract/get tree definitions: ------ + + # Get tree definitions of x (as 1 non-tidy df): + + tree_dfs <- get_fft_df(x) + + # tree_dfs # 6 tree definitions + + + + # 3. Extract individual tree definitions: ------ + + # Get/read specific trees (each tree as 1 tidy df): + fft_1 <- read_fft_df(ffts_df = tree_dfs, tree = 1) + fft_3 <- read_fft_df(ffts_df = tree_dfs, tree = 3) + + + + # 4. Edit individual tree definitions: ------ + + # Reorder nodes: + my_fft_1 <- reorder_nodes(fft = fft_1, order = c(2, 1), quiet = TRUE) # reverse cues + my_fft_2 <- reorder_nodes(fft = fft_3, order = c(2, 1, 3), quiet = TRUE) # no new exit node + my_fft_3 <- reorder_nodes(fft = fft_3, order = c(1, 3, 2), quiet = TRUE) # new exit node + + # Flip exits: + my_fft_4 <- flip_exits(my_fft_1, nodes = 1, quiet = TRUE) # flip exits of node 1 + my_fft_5 <- flip_exits(my_fft_2, nodes = c(1, 2, 3), quiet = TRUE) # flip only exits of node 1 and 2 + + # Drop nodes: + my_fft_1 <- drop_nodes(my_fft_1, nodes = 2, quiet = TRUE) # drop exit node + my_fft_2 <- drop_nodes(my_fft_2, nodes = 2, quiet = TRUE) # drop non-exit node + + # Edit nodes: + my_fft_3 <- edit_nodes(my_fft_3, # edit 2 nodes: + nodes = c(1, 2), + direction = c("<", "<="), + threshold = c(4.5, 5.5), + exit = c(1, 0), + quiet = TRUE) + + # Add nodes: + my_fft_4 <- add_nodes(my_fft_4, nodes = 2, class = "n", cue = "sep.len", direction = "<=", threshold = "5", exit = 0, quiet = TRUE) # new 2nd node + my_fft_5 <- add_nodes(my_fft_5, nodes = 4, class = "n", cue = "sep.len", direction = ">", threshold = "5", exit = .5, quiet = TRUE) # new final node + + + + # 5. Convert and add/collect/gather tree definitions: ------ + + # Write FFT definition (into 1 non-tidy df): + my_tree_dfs <- write_fft_df(my_fft_1, tree = 1) + + # Add other trees (using pipes): + my_tree_dfs <- my_fft_2 |> write_fft_df(tree = 2) |> add_fft_df(my_tree_dfs) + my_tree_dfs <- my_fft_3 |> write_fft_df(tree = 3) |> add_fft_df(my_tree_dfs) + my_tree_dfs <- my_fft_4 |> write_fft_df(tree = 4) |> add_fft_df(my_tree_dfs) + my_tree_dfs <- my_fft_5 |> write_fft_df(tree = 5) |> add_fft_df(my_tree_dfs) + + # my_tree_dfs # => 5 new tree definitions + + + # Add the set of 5 new trees to 6 original ones (re-numbering new ones): + all_fft_dfs <- add_fft_df(my_tree_dfs, tree_dfs) + # all_fft_dfs # => 6 old and 5 new trees = 11 trees + + + + # 6. Apply new tree.definitions to data: ------ + + + # a: Evaluate new tree.definitions for an existing FFTrees object x: + + y <- FFTrees(object = x, # existing FFTrees object x + tree.definitions = all_fft_dfs, # set of all FFT definitions + main = "Iris 2", # new label + quiet = TRUE + ) + + + # b: Create a new FFTrees object z (using formula and original data): + + z <- FFTrees(formula = virginica ~ ., + data = iris.v, # using original data + tree.definitions = all_fft_dfs, # set of all FFT definitions + main = "Iris 2", # new label + quiet = TRUE + ) + + + + # 7. Compare results: ------ + + # summary(y) + # summary(z) + + # all.equal(y, z) + + # # Note: Tree #11 is remarkably bad (bacc = 11%). + # plot(z, tree = 11) + + + + # 8. Tests: ------ + + testthat::expect_is(y, "FFTrees") + testthat::expect_is(z, "FFTrees") + + +}) + + +# eof. diff --git a/tests/testthat/test_10_NA_data.R b/tests/testthat/test_11_NA_data.R similarity index 84% rename from tests/testthat/test_10_NA_data.R rename to tests/testthat/test_11_NA_data.R index ca454eeb..6214de19 100644 --- a/tests/testthat/test_10_NA_data.R +++ b/tests/testthat/test_11_NA_data.R @@ -1,6 +1,6 @@ context("Handle NA data") -# NA values in predictors: +# Create FFTs when data has NA values in different types of predictors: test_that("FFTrees works with NA values in categorical predictors", { @@ -12,8 +12,8 @@ test_that("FFTrees works with NA values in categorical predictors", { # Main: Create an FFTrees object: fft_NA_1 <- FFTrees(crit ~ ., - data = data_NA_categorical, - quiet = TRUE) + data = data_NA_categorical, + quiet = TRUE) testthat::expect_is(fft_NA_1, "FFTrees") @@ -31,8 +31,8 @@ test_that("FFTrees works with NA values in 2 numeric predictors", { # Create an FFTrees object: fft_NA_2 <- FFTrees(crit ~ ., - data = data_NA_numeric, - quiet = TRUE) + data = data_NA_numeric, + quiet = TRUE) testthat::expect_is(fft_NA_2, "FFTrees") diff --git a/vignettes/FFTrees_examples.Rmd b/vignettes/FFTrees_examples.Rmd index 016aadf7..af97ba52 100644 --- a/vignettes/FFTrees_examples.Rmd +++ b/vignettes/FFTrees_examples.Rmd @@ -156,6 +156,7 @@ plot(mushrooms_ring_fft, data = "test") As we can see, this tree (in `mushrooms_ring_fft`) has both sensitivity and specificity values of around\ $80$%, but does not perform as well as our earlier one (in `mushrooms_fft`). This suggests that we should discard the expert's advice and primarily rely on the\ `odor` and\ `sporepc` cues. + ### Iris.v data ```{r iris-image, fig.align = "center", out.width = "225px", echo = FALSE} @@ -170,7 +171,7 @@ In this example, we'll create trees using the entire dataset (without splitting # Create FFTrees object for iris data: iris_fft <- FFTrees(formula = virginica ~., data = iris.v, - main = "Iris", + main = "Iris viginica", decision.labels = c("Not-Vir", "Vir")) ``` @@ -187,6 +188,7 @@ summary(iris_fft) # summarize FFTrees object However, let's first take a look at the individual training cue accuracies... + #### Visualizing cue accuracies We can plot the training cue accuracies during training by specifying `what = "cues"`: @@ -199,6 +201,7 @@ plot(iris_fft, what = "cues") It looks like the two cues\ `pet.len` and\ `pet.wid` are the best predictors for this dataset. Based on this insight, we should expect the final trees will likely use one or both of these cues. + #### Visualizing FFT performance Now let's visualize the best tree: @@ -211,6 +214,7 @@ plot(iris_fft) Indeed, it turns out that the best tree only uses the\ `pet.len` and\ `pet.wid` cues (in that order). For this data, the fitted tree exhibits a performance with a sensitivity of\ 100% and a specificity of\ 94%. + #### Viewing alternative FFTs Now, this tree did quite well, but what if someone wanted a tree with the lowest possible false alarm rate? diff --git a/vignettes/FFTrees_mytree.Rmd b/vignettes/FFTrees_mytree.Rmd index 12bd921f..a72e3ec2 100644 --- a/vignettes/FFTrees_mytree.Rmd +++ b/vignettes/FFTrees_mytree.Rmd @@ -288,9 +288,9 @@ plot(fft_4, n.per.icon = 50, what = "all", show.iconguide = TRUE) # Overall accuracy is 10% above baseline (predicting False for all cases). ``` - + - + @@ -364,7 +364,7 @@ When looking at **Figure\ 3**, we first move down on the right side (from retrie We illustrate a typical workflow by redefining some FFTs that were built in the [Tutorial: FFTs for heart disease](FFTrees_heart.html) and evaluating them on the (full) `heartdisease` data. -To obtain a set of existing tree definitions, we use our default algorithms to create an `FFTrees` object\ `x`: +To obtain a set of existing tree definitions, we use our default algorithm to create an `FFTrees` object\ `x`: ```{r fft-treedef-01, message = FALSE} # Create an FFTrees object x: @@ -401,7 +401,7 @@ Alternatively, we can use the `get_fft_df()` utility function on\ `x` to obtain (tree_dfs <- get_fft_df(x)) ``` -The resulting R object\ `tree_dfs` is a data frame with `r ncol(tree_dfs)` variables. +The resulting R object\ `tree_dfs` is a data frame with `r ncol(tree_dfs)`\ variables. Each of its `r nrow(tree_dfs)` rows defines an FFT in the context of our current `FFTrees` object\ `x` (see the vignette on [Creating FFTs with FFTrees()](FFTrees_function.html) for help on interpreting tree definitions). As the "ifan" algorithm responsible for creating these trees yields a family of highly similar FFTs (which vary only by their exits, and may truncate some cues), we may want to explore alternative versions of these trees. @@ -482,6 +482,7 @@ For instance, the tree definition with a signal exit at the first node of `my_ff (my_fft_4 <- flip_exits(my_fft_1, nodes = c(1, 2))) ``` + #### Using **magrittr** pipes to combine steps The tree conversion and editing functions do not need to be used separately. @@ -569,8 +570,8 @@ When using the main `FFTrees()` function with a set of `tree.definitions` (as a Importantly, however, the input of `tree.definitions` prevents the generation of new FFTs (via the "ifan" or "dfan" algorithms) and instead evaluates the FFT definitions provided on the data specified:^[If the `tree.definitions` contain cue variables or values that cannot be found in the data, this will result in errors.] ```{r use-tree-definitions-01} -# Evaluate tree.definitions for an existing FFTrees object y: -y <- FFTrees(object = x, # an existing FFTrees object +# Evaluate new tree.definitions for an existing FFTrees object x: +y <- FFTrees(object = x, # existing FFTrees object x tree.definitions = my_tree_dfs, # new set of FFT definitions main = "Heart Disease 2" # new label )