From 224a7ade5a595f8868ea1331a88a00b3d2a13af1 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Fri, 13 Sep 2024 11:08:41 +0200 Subject: [PATCH 1/3] remove R dependency --- Project.toml | 1 + contrasts_kwdyz11.qmd | 13 +++++-------- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/Project.toml b/Project.toml index 4b39ba7..1eb9b26 100644 --- a/Project.toml +++ b/Project.toml @@ -33,6 +33,7 @@ MixedModelsSim = "d5ae56c5-23ca-4a1f-b505-9fc4796fc1fe" PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" RCall = "6f49c342-dc21-5d91-9882-a32aef131414" +RData = "df47a6cb-8c03-5eed-afd8-b6050d6c41da" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RegressionFormulae = "545c379f-4ec2-4339-9aea-38f2fb6a8ba2" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" diff --git a/contrasts_kwdyz11.qmd b/contrasts_kwdyz11.qmd index 06ba77e..434b5bf 100644 --- a/contrasts_kwdyz11.qmd +++ b/contrasts_kwdyz11.qmd @@ -14,7 +14,7 @@ using CairoMakie using Chain using DataFrames using MixedModels -using RCall +using RData: load as rload using SMLP2024: dataset using StatsBase using StatsModels @@ -30,13 +30,13 @@ Many researchers have pointed out that contrasts should be "tested instead of, r For a (quasi-)experimental set of data, there is (or should be) a clear _a priori_ theoretical commitment to specific hypotheses about differences between factor levels and how these differences enter in interactions with other factors. This specification should be used in the first LMM and reported, irrespective of the outcome. If alternative theories lead to alternative _a priori_ contrast specifications, both analyses are justified. If the observed means render the specification completely irrelevant, the comparisons originally planned could still be reported in a Supplement). -In this script, we are working through a large number of different contrasts for the same data. The purpose is to introduce both the preprogrammed (“canned”) and the general options to specify hypotheses about main effects and interactions. Obviously, we do not endorse generating a plot of the means and specifying the contrasts accordingly. This is known as the [Texas sharpshooter](https://www.bayesianspectacles.org/origin-of-the-texas-sharpshooter/) fallacy. The link leads to an illustration and brief historical account by Wagenmakers (2018). +In this script, we are working through a large number of different contrasts for the same data. The purpose is to introduce both the preprogrammed ("canned") and the general options to specify hypotheses about main effects and interactions. Obviously, we do not endorse generating a plot of the means and specifying the contrasts accordingly. This is known as the [Texas sharpshooter](https://www.bayesianspectacles.org/origin-of-the-texas-sharpshooter/) fallacy. The link leads to an illustration and brief historical account by Wagenmakers (2018). Irrespective of how results turn out, there is nothing wrong with specifying a set of post-hoc contrasts to gain a better understanding of what the data are trying to tell us. Of course, in an article or report about the study, the _a priori_ and post-hoc nature of contrast specifications must be made clear. Some kind of alpha-level adjustment (e.g., Bonferroni) may be called for, too. And, of course, there are grey zones. -There is quite a bit of statistical literature on contrasts. Two “local” references are @Brehm2022 and @Schad2020. +There is quite a bit of statistical literature on contrasts. Two "local" references are @Brehm2022 and @Schad2020. -For further readings see “Further Readings” in @Schad2020. +For further readings see "Further Readings" in @Schad2020. # Example data {#sec-data} @@ -439,10 +439,7 @@ Three factors: 2 x 3 = 6 measures / subject ```{julia} -R""" -dat2 = readRDS("data/Exp_2x2x3.rds"); -""" -@rget(dat2) +dat2 = load("data/Exp_2x2x3.rds"); ``` We select an LMM supported by the data. From f153a8eecd82d6eec1bd86f55555a2b1a641837c Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Fri, 13 Sep 2024 11:27:52 +0200 Subject: [PATCH 2/3] update datasets --- src/datasets.jl | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/datasets.jl b/src/datasets.jl index 83654d4..3e27d7c 100644 --- a/src/datasets.jl +++ b/src/datasets.jl @@ -2,7 +2,7 @@ _file(x) = joinpath(CACHE[], string(x, ".arrow")) clear_scratchspaces!() = Scratch.clear_scratchspaces!(@__MODULE__) -const datasets = +const DATASETS = CSV.read( IOBuffer( """ @@ -21,6 +21,7 @@ fggk21_Child,c2fmn,1,61c91e00336e6f804e9f6b86986ebb4a14561cc4908b3a21cb27c113d2b fggk21_Score,7fqx3,1,99d73ee705aaf5f4ee696eadbba992d0113ba6f467ce337a62a63853e4617400 kkl15,p8cea,2,90d7bb137c8613d7a15c8597c461aee7c7cb0f0989a07c80fc93e1fbe2e5c156 kwdyz11,4cv52,3,2fa23aa8aa25e1adb10183c8d29646ae0d19d6baef9d711c9906f7fa1b225571 +exp_2x2x3,za9gs,1,cb09684b7373492e849c83f20a071b97f986123677134ac2ddb9ec0dcb32e503 """ ), Table; @@ -29,25 +30,34 @@ kwdyz11,4cv52,3,2fa23aa8aa25e1adb10183c8d29646ae0d19d6baef9d711c9906f7fa1b225571 ) if @isdefined(_cacheddatasets) - empty!(_cacheddatasets) # start from an empty cache in case datasets has changed + empty!(_cacheddatasets) # start from an empty cache in case DATASETS has changed else const _cacheddatasets = Dict{Symbol, Arrow.Table}() end +""" + datasets() + +Return a vector of the names of datasets available for use in [`dataset`](@ref). +""" +function datasets() + return sort!(vcat(SMLP2024.DATASETS.dsname, MixedModelsDatasets.datasets())) +end + """ dataset(name::Union(Symbol, AbstractString)) Return as an `Arrow.Table` the dataset named `name`. Available dataset names, their versions, the filenames on the osf.io site and an SHA2 checksum of their contents -are in the table `datasets`. +are in the table `DATASETS`. The files are cached in the scratchspace for this package. The name of this directory is the value of `CACHE[]`. """ function dataset(nm::AbstractString) return get!(_cacheddatasets, Symbol(nm)) do # retrieve from cache if available, otherwise - # check for nm in datasets table first so MMDS can be overridden - rows = filter(==(nm) ∘ getproperty(:dsname), datasets) + # check for nm in DATASETS table first so MMDS can be overridden + rows = filter(==(nm) ∘ getproperty(:dsname), DATASETS) if isempty(rows) nm in MMDS || error("Dataset '$nm' is not available") MixedModelsDatasets.dataset(nm) @@ -58,10 +68,12 @@ function dataset(nm::AbstractString) if ismissing(row.filename) load_quiver() # special-case `ratings` and `movies` else + @info "Downloading dataset..." Downloads.download( string("https://osf.io/", row.filename, "/download?version=", row.version), fnm, ) + @info "done" end end if row.sha2 ≠ bytes2hex(open(sha2_256, fnm)) From 4653a9989d0cc78a6719d157fd0ffedcb2e02034 Mon Sep 17 00:00:00 2001 From: Phillip Alday Date: Fri, 13 Sep 2024 11:48:33 +0200 Subject: [PATCH 3/3] use dataset improvements, remove RChunks --- Project.toml | 1 - contrasts_kwdyz11.qmd | 48 +++++++++++++++---------------------------- data/Exp_2x2x3.rds | 3 --- 3 files changed, 16 insertions(+), 36 deletions(-) delete mode 100644 data/Exp_2x2x3.rds diff --git a/Project.toml b/Project.toml index 1eb9b26..4b39ba7 100644 --- a/Project.toml +++ b/Project.toml @@ -33,7 +33,6 @@ MixedModelsSim = "d5ae56c5-23ca-4a1f-b505-9fc4796fc1fe" PooledArrays = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" RCall = "6f49c342-dc21-5d91-9882-a32aef131414" -RData = "df47a6cb-8c03-5eed-afd8-b6050d6c41da" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" RegressionFormulae = "545c379f-4ec2-4339-9aea-38f2fb6a8ba2" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" diff --git a/contrasts_kwdyz11.qmd b/contrasts_kwdyz11.qmd index 434b5bf..3c07811 100644 --- a/contrasts_kwdyz11.qmd +++ b/contrasts_kwdyz11.qmd @@ -10,18 +10,17 @@ fig-format: png ```{julia} #| code-fold: true +using AlgebraOfGraphics using CairoMakie using Chain using DataFrames using MixedModels -using RData: load as rload using SMLP2024: dataset using StatsBase using StatsModels CairoMakie.activate!(; type="png") -import ProgressMeter progress = false ``` @@ -439,7 +438,7 @@ Three factors: 2 x 3 = 6 measures / subject ```{julia} -dat2 = load("data/Exp_2x2x3.rds"); +dat2 = dataset(:exp_2x2x3) ``` We select an LMM supported by the data. @@ -479,35 +478,20 @@ A: A2 & B: B3 0.523243 0.950202 0.55 0.5819 ─────────────────────────────────────────────────────────────────── ``` -The following figure also appears only in interactive chunk execution. The chunk generates also an error when rendered. - -```{r} -#| eval: true - -library(tidyverse) -dat2 = readRDS("data/Exp_2x2x3.rds"); - -cbPalette <- c("#000000", "#E69F00", "#56B4E9", "#009E73", - "#F0E442", "#0072B2", "#D55E00", "#CC79A7") - -tbl1 <- - dat2 |> - group_by(Subj, A, B) |> - reframe(N=n(),dv=mean(dv)) |> - group_by(A, B) |> - reframe(N=n(), dv_M=mean(dv), dv_SD=sd(dv), dv_SE=dv_SD/sqrt(N)) -tbl1 - -fig1 <- - tbl1 |> - ggplot(aes(y=dv_M, x=B, group=A, color=A)) + - geom_point() + - geom_line() + - scale_color_manual(values=cbPalette[2:3]) + - theme_bw() - -print(fig1) -NULL +```{julia} +using Chain +tbl1 = @chain DataFrame(dat2) begin + groupby(_, [:Subj, :A, :B]) + combine(_, nrow => :n, :dv => mean => :dv) + groupby(_, [:A, :B]) + combine(_, + :dv => mean => :dv_M, + :dv => std => :dv_SD, + :dv => sem => :dv_SE) +end + +fig1 = data(tbl1) * mapping(:B, :dv_M; color=:A) * (visual(Lines) + visual(Scatter)) +draw(fig1) ``` diff --git a/data/Exp_2x2x3.rds b/data/Exp_2x2x3.rds deleted file mode 100644 index f85b97d..0000000 --- a/data/Exp_2x2x3.rds +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:4d881c704c21f8b30236d88b3af7f761309fd351c0aa91f3f93cba940a34006a -size 3270