diff --git a/Project.toml b/Project.toml index 02f0de54..2c63fcd7 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TMLE" uuid = "8afdd2fb-6e73-43df-8b62-b1650cd9c8cf" authors = ["Olivier Labayle"] -version = "0.12.2" +version = "0.13.0" [deps] AbstractDifferentiation = "c29ec348-61ec-40c8-8164-b8c60e9d9f3d" @@ -21,6 +21,7 @@ Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +SplitApplyCombine = "03a91e81-4c3e-53e1-a0a4-9c0c8f19dd66" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" TableOperations = "ab02a1b2-a7df-11e8-156e-fb1833f50b87" Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" @@ -55,6 +56,7 @@ TableOperations = "1.2" Tables = "1.6" YAML = "0.4.9" Zygote = "0.6" +SplitApplyCombine = "1.2.2" julia = "1.6, 1.7, 1" [extras] diff --git a/src/TMLE.jl b/src/TMLE.jl index 55048640..7eaac807 100644 --- a/src/TMLE.jl +++ b/src/TMLE.jl @@ -20,6 +20,7 @@ import AbstractDifferentiation as AD using Graphs using MetaGraphsNext using Combinatorics +using SplitApplyCombine # ############################################################################# # EXPORTS @@ -28,7 +29,7 @@ using Combinatorics export SCM, StaticSCM, add_equations!, add_equation!, parents, vertices export CM, ATE, IATE export AVAILABLE_ESTIMANDS -export generateATEs +export generateATEs, generateIATEs export TMLEE, OSE, NAIVE export ComposedEstimand export var, estimate, OneSampleTTest, OneSampleZTest, OneSampleHotellingT2Test,pvalue, confint, emptyIC diff --git a/src/counterfactual_mean_based/estimands.jl b/src/counterfactual_mean_based/estimands.jl index 760a02fd..9a28f94f 100644 --- a/src/counterfactual_mean_based/estimands.jl +++ b/src/counterfactual_mean_based/estimands.jl @@ -231,37 +231,207 @@ unique_non_missing(dataset, colname) = unique(skipmissing(Tables.getcolumn(datas unique_treatment_values(dataset, colnames) =(;(colname => unique_non_missing(dataset, colname) for colname in colnames)...) +get_treatments_contrasts(treatments_unique_values) = [collect(Combinatorics.combinations(treatments_unique_values[T], 2)) for T in keys(treatments_unique_values)] + +function generateComposedEstimandFromContrasts( + constructor, + treatments_levels::NamedTuple{names}, + outcome; + confounders=nothing, + outcome_extra_covariates=(), + freq_table=nothing, + positivity_constraint=nothing + ) where names + treatments_contrasts = get_treatments_contrasts(treatments_levels) + components = [] + for combo ∈ Iterators.product(treatments_contrasts...) + treatments_contrast = [NamedTuple{(:control, :case)}(treatment_control_case) for treatment_control_case ∈ combo] + Ψ = constructor( + outcome=outcome, + treatment_values=NamedTuple{names}(treatments_contrast), + treatment_confounders = confounders, + outcome_extra_covariates=outcome_extra_covariates + ) + if satisfies_positivity(Ψ, freq_table; positivity_constraint=positivity_constraint) + push!(components, Ψ) + end + end + return ComposedEstimand(joint_estimand, Tuple(components)) +end + +GENERATE_DOCSTRING = """ +The components of this estimand are generated from the treatment variables contrasts. +For example, consider two treatment variables T₁ and T₂ each taking three possible values (0, 1, 2). +For each treatment variable, the marginal contrasts are defined by (0 → 1, 1 → 2, 0 → 2), there are thus +3 x 3 = 9 joint contrasts to be generated: + +- (T₁: 0 → 1, T₂: 0 → 1) +- (T₁: 0 → 1, T₂: 1 → 2) +- (T₁: 0 → 1, T₂: 0 → 2) +- (T₁: 1 → 2, T₂: 0 → 1) +- (T₁: 1 → 2, T₂: 1 → 2) +- (T₁: 1 → 2, T₂: 0 → 2) +- (T₁: 0 → 2, T₂: 0 → 1) +- (T₁: 0 → 2, T₂: 1 → 2) +- (T₁: 0 → 2, T₂: 0 → 2) + +# Return + +A `ComposedEstimand` with causal or statistical components. + +# Args + +- `treatments_levels`: A NamedTuple providing the unique levels each treatment variable can take. +- `outcome`: The outcome variable. +- `confounders=nothing`: The generated components will inherit these confounding variables. +If `nothing`, causal estimands are generated. +- `outcome_extra_covariates=()`: The generated components will inherit these `outcome_extra_covariates`. +- `positivity_constraint=nothing`: Only components that pass the positivity constraint are added to the `ComposedEstimand` + +""" + +""" + generateATEs( + treatments_levels::NamedTuple{names}, outcome; + confounders=nothing, + outcome_extra_covariates=(), + freq_table=nothing, + positivity_constraint=nothing + ) where names + +Generate a `ComposedEstimand` of ATEs from the `treatments_levels`. $GENERATE_DOCSTRING + +# Example: + +To generate a causal composed estimand with 3 components: + +```@example +generateATEs((T₁ = (0, 1), T₂=(0, 1, 2)), :Y₁) +``` + +To generate a statistical composed estimand with 9 components: + +```@example +generateATEs((T₁ = (0, 1, 2), T₂=(0, 1, 2)), :Y₁, confounders=[:W₁, :W₂]) +``` +""" +function generateATEs( + treatments_levels::NamedTuple{names}, outcome; + confounders=nothing, + outcome_extra_covariates=(), + freq_table=nothing, + positivity_constraint=nothing + ) where names + return generateComposedEstimandFromContrasts( + ATE, + treatments_levels, + outcome; + confounders=confounders, + outcome_extra_covariates=outcome_extra_covariates, + freq_table=freq_table, + positivity_constraint=positivity_constraint + ) +end + """ - generateATEs(dataset, treatments, outcome; confounders=nothing, outcome_extra_covariates=()) + generateATEs(dataset, treatments, outcome; + confounders=nothing, + outcome_extra_covariates=(), + positivity_constraint=nothing + ) Find all unique values for each treatment variable in the dataset and generate all possible ATEs from these values. """ -function generateATEs(dataset, treatments, outcome; confounders=nothing, outcome_extra_covariates=()) - treatments_unique_values = unique_treatment_values(dataset, treatments) - return generateATEs(treatments_unique_values, outcome; confounders=confounders, outcome_extra_covariates=outcome_extra_covariates) +function generateATEs(dataset, treatments, outcome; + confounders=nothing, + outcome_extra_covariates=(), + positivity_constraint=nothing + ) + treatments_levels = unique_treatment_values(dataset, treatments) + freq_table = positivity_constraint !== nothing ? frequency_table(dataset, keys(treatments_levels)) : nothing + return generateATEs( + treatments_levels, + outcome; + confounders=confounders, + outcome_extra_covariates=outcome_extra_covariates, + freq_table=freq_table, + positivity_constraint=positivity_constraint + ) end """ - generateATEs(treatments_unique_values, outcome; confounders=nothing, outcome_extra_covariates=()) + generateIATEs( + treatments_levels::NamedTuple{names}, outcome; + confounders=nothing, + outcome_extra_covariates=(), + freq_table=nothing, + positivity_constraint=nothing + ) where names + +Generates a `ComposedEstimand` of Average Interation Effects from `treatments_levels`. $GENERATE_DOCSTRING + +# Example: + +To generate a causal composed estimand with 3 components: -Generate all possible ATEs from the `treatments_unique_values`. +```@example +generateIATEs((T₁ = (0, 1), T₂=(0, 1, 2)), :Y₁) +``` + +To generate a statistical composed estimand with 9 components: + +```@example +generateIATEs((T₁ = (0, 1, 2), T₂=(0, 1, 2)), :Y₁, confounders=[:W₁, :W₂]) +``` """ -function generateATEs(treatments_unique_values, outcome; confounders=nothing, outcome_extra_covariates=()) - treatments = Tuple(Symbol.(keys(treatments_unique_values))) - treatments_control_case = [collect(Combinatorics.combinations(treatments_unique_values[T], 2)) for T in treatments] - - ATEs = [] - for combo ∈ Iterators.product(treatments_control_case...) - treatments_control_case = [NamedTuple{(:control, :case)}(treatment_control_case) for treatment_control_case ∈ combo] - push!( - ATEs, - ATE( - outcome=outcome, - treatment_values=NamedTuple{treatments}(treatments_control_case), - treatment_confounders = confounders, - outcome_extra_covariates=outcome_extra_covariates - ) - ) - end - return ATEs -end \ No newline at end of file +function generateIATEs( + treatments_levels::NamedTuple{names}, outcome; + confounders=nothing, + outcome_extra_covariates=(), + freq_table=nothing, + positivity_constraint=nothing + ) where names + return generateComposedEstimandFromContrasts( + IATE, + treatments_levels, + outcome; + confounders=confounders, + outcome_extra_covariates=outcome_extra_covariates, + freq_table=freq_table, + positivity_constraint=positivity_constraint + ) +end + +""" + generateIATEs(dataset, treatments, outcome; + confounders=nothing, + outcome_extra_covariates=(), + positivity_constraint=nothing + ) + +Finds treatments levels from the dataset and generates a `ComposedEstimand` of Average Interation Effects from them +(see [`generateIATEs(treatments_levels, outcome; confounders=nothing, outcome_extra_covariates=())`](@ref)). +""" +function generateIATEs(dataset, treatments, outcome; + confounders=nothing, + outcome_extra_covariates=(), + positivity_constraint=nothing + ) + treatments_levels = unique_treatment_values(dataset, treatments) + freq_table = positivity_constraint !== nothing ? frequency_table(dataset, keys(treatments_levels)) : nothing + return generateIATEs( + treatments_levels, + outcome; + confounders=confounders, + outcome_extra_covariates=outcome_extra_covariates, + freq_table=freq_table, + positivity_constraint=positivity_constraint + ) +end + +joint_levels(Ψ::StatisticalIATE) = Iterators.product(values(Ψ.treatment_values)...) + +joint_levels(Ψ::StatisticalATE) = + (Tuple(Ψ.treatment_values[T][c] for T ∈ keys(Ψ.treatment_values)) for c in (:case, :control)) + +joint_levels(Ψ::StatisticalCM) = (values(Ψ.treatment_values),) diff --git a/src/utils.jl b/src/utils.jl index a588cebd..91b12699 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -74,4 +74,24 @@ default_models(;Q_binary=LinearBinaryClassifier(), Q_continuous=LinearRegressor( G_default = G ) -is_binary(dataset, columnname) = Set(skipmissing(Tables.getcolumn(dataset, columnname))) == Set([0, 1]) \ No newline at end of file +is_binary(dataset, columnname) = Set(skipmissing(Tables.getcolumn(dataset, columnname))) == Set([0, 1]) + +function satisfies_positivity(Ψ, freq_table; positivity_constraint=0.01) + for jointlevel in joint_levels(Ψ) + if !haskey(freq_table, jointlevel) || freq_table[jointlevel] < positivity_constraint + return false + end + end + return true +end + +satisfies_positivity(Ψ, freq_table::Nothing; positivity_constraint=nothing) = true + +function frequency_table(dataset, colnames) + iterator = zip((Tables.getcolumn(dataset, colname) for colname in sort(collect(colnames)))...) + counts = groupcount(x -> x, iterator) + for key in keys(counts) + counts[key] /= nrows(dataset) + end + return counts +end \ No newline at end of file diff --git a/test/counterfactual_mean_based/estimands.jl b/test/counterfactual_mean_based/estimands.jl index 6bce0683..b6bed086 100644 --- a/test/counterfactual_mean_based/estimands.jl +++ b/test/counterfactual_mean_based/estimands.jl @@ -202,6 +202,12 @@ end @test Ψreconstructed == Ψ end +@testset "Test control_case_settings" begin + treatments_unique_values = (T₁=(1, 0, 2),) + @test TMLE.get_treatments_contrasts(treatments_unique_values) == [[[1, 0], [1, 2], [0, 2]]] + treatments_unique_values = (T₁=(1, 0, 2), T₂=["AC", "CC"]) + @test TMLE.get_treatments_contrasts(treatments_unique_values) == [[[1, 0], [1, 2], [0, 2]], [["AC", "CC"]]] +end @testset "Test generateATEs" begin dataset = ( T₁=[0, 1, 2, missing], @@ -213,23 +219,74 @@ end Y₂ = [1, 2, 3, 4] ) # No confounders, 1 treatment, no extra covariate: 3 causal ATEs - ATEs = generateATEs(dataset, [:T₁], :Y₁) - @test ATEs == [ + composedATE = generateATEs(dataset, [:T₁], :Y₁) + @test composedATE.args == ( TMLE.CausalATE(:Y₁, (T₁ = (case = 1, control = 0),)), TMLE.CausalATE(:Y₁, (T₁ = (case = 2, control = 0),)), TMLE.CausalATE(:Y₁, (T₁ = (case = 2, control = 1),)) - ] + ) # 2 treatments - ATEs = generateATEs(dataset, [:T₁, :T₂], :Y₁; + composedATE = generateATEs(dataset, [:T₁, :T₂], :Y₁; confounders=[:W₁, :W₂], outcome_extra_covariates=[:C] ) - # 9 expected different treatment settings - @test length(ATEs) == 9 - @test length(unique([Ψ.treatment_values for Ψ in ATEs])) == 9 - # Covariates and confounders - @test all(Ψ.outcome_extra_covariates == (:C,) for Ψ in ATEs) - @test all(Ψ.treatment_confounders == (T₁ = (:W₁, :W₂), T₂ = (:W₁, :W₂)) for Ψ in ATEs) + ## 9 expected different treatment settings + @test length(composedATE.args) == 9 + @test length(unique([Ψ.treatment_values for Ψ in composedATE.args])) == 9 + ## Covariates and confounders + @test all(Ψ.outcome_extra_covariates == (:C,) for Ψ in composedATE.args) + @test all(Ψ.treatment_confounders == (T₁ = (:W₁, :W₂), T₂ = (:W₁, :W₂)) for Ψ in composedATE.args) + + # positivity constraint + composedATE = generateATEs(dataset, [:T₁, :T₂], :Y₁; + confounders=[:W₁, :W₂], + outcome_extra_covariates=[:C], + positivity_constraint=0.1 + ) + @test length(composedATE.args) == 1 + +end + +@testset "Test generateIATEs" begin + dataset = ( + T₁ = [0, 1, 2, missing], + T₂ = ["AC", "CC", missing, "AA"], + W₁ = [1, 2, 3, 4], + W₂ = [1, 2, 3, 4], + C = [1, 2, 3, 4], + Y₁ = [1, 2, 3, 4], + Y₂ = [1, 2, 3, 4] + ) + # From dataset + composedIATE = generateIATEs(dataset, [:T₁, :T₂], :Y₁, + confounders=[:W₁], + outcome_extra_covariates=[:C] + ) + @test composedIATE isa ComposedEstimand + ## 3 x 3 = 9 expected components + @test length(composedIATE.args) == 9 + for arg ∈ composedIATE.args + @test arg isa TMLE.StatisticalIATE + @test arg.treatment_confounders == (T₁=(:W₁,), T₂=(:W₁,)) + @test arg.outcome_extra_covariates == (:C,) + end + # From unique values + composedIATE = generateIATEs((T₁ = (0, 1), T₂=(0, 1, 2)), :Y₁) + @test composedIATE isa ComposedEstimand + ## 1 x 3 expected components + @test length(composedIATE.args) == 1*3 + for arg ∈ composedIATE.args + @test arg isa TMLE.CausalIATE + end + + # positivity constraint + composedIATE = generateIATEs(dataset, [:T₁, :T₂], :Y₁, + confounders=[:W₁], + outcome_extra_covariates=[:C], + positivity_constraint=0.1 + ) + @test length(composedIATE.args) == 0 + end diff --git a/test/utils.jl b/test/utils.jl index 76725fd7..db4caa22 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -51,6 +51,100 @@ end @test !isordered(cfT.T₂) end +@testset "Test positivity_constraint" begin + dataset = ( + A = [1, 1, 0, 1, 0, 2, 2, 1], + B = ["AC", "CC", "AA", "AA", "AA", "AA", "AA", "AA"] + ) + # One variable + frequency_table = TMLE.frequency_table(dataset, [:A]) + @test frequency_table[(0,)] == 0.25 + @test frequency_table[(1,)] == 0.5 + @test frequency_table[(2,)] == 0.25 + + Ψ = CM( + outcome = :toto, + treatment_values = (A=1,), + treatment_confounders = (A=[],) + ) + @test TMLE.joint_levels(Ψ) == ((1,),) + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.4) == true + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.6) == false + + Ψ = ATE( + outcome = :toto, + treatment_values= (A = (case=1, control=0),), + treatment_confounders = (A=[],) + ) + @test collect(TMLE.joint_levels(Ψ)) == [(1,), (0,)] + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.2) == true + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.3) == false + + # Two variables + ## Treatments are sorted: [:B, :A] -> [:A, :B] + frequency_table = TMLE.frequency_table(dataset, [:B, :A]) + @test frequency_table[(1, "CC")] == 0.125 + @test frequency_table[(1, "AA")] == 0.25 + @test frequency_table[(0, "AA")] == 0.25 + @test frequency_table[(1, "AC")] == 0.125 + @test frequency_table[(2, "AA")] == 0.25 + + Ψ = CM( + outcome = :toto, + treatment_values = (B = "CC", A = 1), + treatment_confounders = (B = [], A = []) + ) + @test TMLE.joint_levels(Ψ) == ((1, "CC"),) + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.1) == true + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.15) == false + + Ψ = ATE( + outcome = :toto, + treatment_values = (B=(case="AA", control="AC"), A=(case=1, control=1),), + treatment_confounders = (B = (), A = (),) + ) + @test collect(TMLE.joint_levels(Ψ)) == [(1, "AA"), (1, "AC")] + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.1) == true + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.2) == false + + Ψ = IATE( + outcome = :toto, + treatment_values = (B=(case="AC", control="AA"), A=(case=1, control=0),), + treatment_confounders = (B=(), A=()), + ) + @test collect(TMLE.joint_levels(Ψ)) == [ + (1, "AC") (1, "AA") + (0, "AC") (0, "AA")] + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=1.) == false + + frequency_table = Dict( + (1, "CC") => 0.125, + (1, "AA") => 0.25, + (0, "AA") => 0.25, + (0, "AC") => 0.25, + (1, "AC") => 0.125, + (2, "AA") => 0.25 + ) + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.3) == false + @test TMLE.satisfies_positivity(Ψ, frequency_table, positivity_constraint=0.1) == true + + Ψ = IATE( + outcome = :toto, + treatment_values = (B=(case="AC", control="AA"), A=(case=1, control=0), C=(control=0, case=2)), + treatment_confounders = (B=(), A=(), C=()) + ) + expected_joint_levels = Set([ + (1, "AC", 0), + (0, "AC", 0), + (1, "AA", 0), + (0, "AA", 0), + (1, "AC", 2), + (0, "AC", 2), + (1, "AA", 2), + (0, "AA", 2)]) + @test expected_joint_levels == Set(TMLE.joint_levels(Ψ)) +end + end; true \ No newline at end of file