JuliaDynamics · kahaaga · Aug 19, 2022 · Aug 19, 2022 · Aug 19, 2022 · Aug 20, 2022
diff --git a/docs/src/complexity/compression_complexity.md b/docs/src/complexity/compression_complexity.md
@@ -0,0 +1,61 @@
+# [Compression complexity](@id compression_complexity)
+
+## Interface
+
+```@docs
+compression_complexity
+```
+
+## Algorithms
+
+```@docs
+EffortToCompress
+```
+
+## Example
+
+Below is a reproduction of figure 3 in Nagaraj et al. (2013)[^Nagaraj2013], which compares the (approximate) Lyapunov exponent and compression complexity (ETC) of 200-pt long logistic map time series with varying bifurcation parameters.
+
+For ETC, the time series is symbolized to a binary time series before analysis. Lyapunov exponents are estimated directly on the (non-symbolized) time series.
+
+```@example
+using CausalityTools, Plots, StatsBase, Measures
+
+# Approximation to the Lyapunov exponent for a time series x, from Nagaraj (2013)
+function lyapunov(x, a)
+    L = length(x)
+    lyp = 0.0
+    for i in 2:length(x)
+        lyp += log(2, abs(a - 2*a*x[i]))
+    end
+
+    return lyp / L
+end
+
+
+coeffs = 3.5:0.0001:4.0
+ls = zeros(length(coeffs))
+etcs = zeros(length(coeffs))
+for (i, a) in enumerate(coeffs)
+    sys = logistic2_unidir(r₁ = a, c_xy = 0.0, σ = 0.0)
+    # Generate time series and compute approximate Lyapunov exponents
+    x = trajectory(sys, 200, Ttr = 10000)[:, 1]
+    ls[i] = lyapunov(x, a)
+
+    # Symbolize and compute effort-to-compress
+    y = [xᵢ > 0.5 ? 1 : 0 for xᵢ in x] 
+    etcs[i] = compression_complexity(y, EffortToCompress(normalize = false))
+end
+
+plot(xlabel = "a", 
+    legend = :topleft, 
+    right_margin = 10mm, 
+    bottom_margin = 5mm)
+plot!(coeffs, ls .- mean(ls), label = "Lyapunov exponent", c = :red)
+plot!(twinx(), coeffs, etcs, label = "ETC", axis = :right, c = :black)
+savefig("compression_complexity_etc.svg") # hide
+```
+
+![](compression_complexity_etc.svg)
+
+[^Nagaraj2013]: Nagaraj, N., Balasubramanian, K., & Dey, S. (2013). A new complexity measure for time series analysis and classification. The European Physical Journal Special Topics, 222(3), 847-860.
diff --git a/docs/src/complexity/dynamical_complexity.md b/docs/src/complexity/dynamical_complexity.md
@@ -0,0 +1,5 @@
+# [Dynamical complexity](@id dynamical_complexity)
+
+```@docs
+dynamical_complexity
+```
diff --git a/docs/src/complexity/interventional_complexity_causality.md b/docs/src/complexity/interventional_complexity_causality.md
@@ -0,0 +1,15 @@
+# Interventional complexity causality (ICC)
+
+## Interface
+
+```@docs
+icc
+```
+
+## Estimators
+
+```@docs
+CompressionComplexityCausalityEstimator
+```
+
+## Examples
diff --git a/src/CausalityTools.jl b/src/CausalityTools.jl
@@ -28,6 +28,7 @@ module CausalityTools
     include("methods/closeness/closeness.jl")
     include("methods/correlation/correlation.jl")
     include("methods/recurrence/methods.jl")
+    include("methods/compression/compression.jl")
 
     include("utils/utils.jl")
 

diff --git a/src/InterventionalComplexityCausality/InterventionalComplexityCausality.jl b/src/InterventionalComplexityCausality/InterventionalComplexityCausality.jl
@@ -0,0 +1,7 @@
+using Reexport
+
+@reexport module InterventionalComplexityCausality
+    using ..ComplexityMeasures
+    include("interface.jl")
+    include("ccc/compression_complexity_causality.jl")
+end
diff --git a/src/InterventionalComplexityCausality/ccc/ccc_estimators.jl b/src/InterventionalComplexityCausality/ccc/ccc_estimators.jl
@@ -0,0 +1,42 @@
+export CCCEstimator, CompressionComplexityCausalityEstimator
+
+"""
+    CompressionComplexityCausalityEstimator(
+        algorithm::CompressionComplexityEstimator = EffortToCompress(),
+        w::In = 15,
+        L::Int = 30)
+
+`CompressionComplexityCausalityEstimator` (`CCCEstimator` for short) stores
+parameters used to calculate compute interventional complexity causality (ICC):.
+
+## Parameters
+
+- `algorithm`:  The algorithm used to compute the compression complexity, e.g. an instance of
+    [`EffortToCompress`](@ref).
+- `w`: The block size for `Xⁱ`, which is a block of current values of time series `X`. The
+    default value is set to 15, as recommended in Kathpalia & Nagaraj (2019).
+- `L`: The block size for `X⁻`, `Y⁻`, `Z⁻`, ... , which are blocks of past values of
+    `X`, `Y`, `Z`, and so on, whose values are taken from the immediate past of the `Xⁱ`
+    block. The default value is `L = 30`, but for real applications, this value
+    should determined as described in table S2 in the supplement of Kathpalia & Nagaraj
+    (2019).
+- `step`: ICC is computed over a sliding window of width `w + L`. `step` gives the shift,
+    in number of indices, between windows (``\\delta`` in Kathpalia & Nagaraj, 2019).
+
+## Input data requirements
+
+All estimators assume that the input time series are *pre-symbolized/binned* integer-valued
+time series. Thus, the parameter `B` (number of bins) from Kathpalia & Nararaj
+(2019)[^Kathpalia2019] is not present here.
+
+See also: [`icc`](@ref), [`EffortToCompress`](@ref).
+
+[^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach for Complexity-Causality measure. PeerJ Computer Science, 5, e196.
+"""
+Base.@kwdef struct CompressionComplexityCausalityEstimator{ALG} <: InterventionalComplexityCausalityEstimator
+    algorithm::ALG = EffortToCompress(normalize = true)
+    w = 15
+    L = 30
+    step = 1
+end
+const CCCEstimator = CompressionComplexityCausalityEstimator
diff --git a/src/InterventionalComplexityCausality/ccc/compression_complexity_causality.jl b/src/InterventionalComplexityCausality/ccc/compression_complexity_causality.jl
@@ -0,0 +1,28 @@
+using Statistics
+include("ccc_estimators.jl")
+
+function icc_i(Xⁱ::AbstractVector{J}, X⁻::AbstractVector{J}, Y⁻::AbstractVector{J},
+    estimator::CompressionComplexityCausalityEstimator) where {J <: Integer}
+    return dc(Xⁱ, X⁻, estimator.algorithm) - dc(Xⁱ, X⁻, Y⁻, estimator.algorithm)
+end
+
+function icc_on_segments(x::AbstractVector{J}, y::AbstractVector{J},
+        estimator::CompressionComplexityCausalityEstimator) where {J <: Integer}
+    w, L, step = estimator.w, estimator.L, estimator.step
+    windows = get_windows(x, w + L, step)
+    segment_results = zeros(eltype(zero(1.0)), length(windows))
+    for (i, window) in enumerate(windows)
+        current_indices = window[estimator.L + 1]:window[estimator.L + estimator.w]
+        past_indices = window[1:estimator.L]
+        Xⁱ = @views x[current_indices]
+        X⁻ = @views x[past_indices]
+        Y⁻ = @views y[past_indices]
+        segment_results[i] = dynamical_complexity(Xⁱ, X⁻, estimator.algorithm) -
+            dynamical_complexity(Xⁱ, X⁻, Y⁻, estimator.algorithm)
+    end
+    return segment_results
+end
+
+function icc(x, y, estimator)
+    return Statistics.mean(icc_on_segments(x, y, estimator))
+end
diff --git a/src/InterventionalComplexityCausality/interface.jl b/src/InterventionalComplexityCausality/interface.jl
@@ -0,0 +1,71 @@
+export icc,
+    interventional_complexity_causality,
+    InterventionalComplexityCausalityEstimator
+
+"""
+    InterventionalComplexityCausalityEstimator
+
+Causality estimators that are based on interventional complexity causality (ICC),
+as described in Kathpalia & Nagaraj (2019)[^Kathpalia2019], are subtypes of
+`InterventionalComplexityCausalityEstimator`.
+
+[^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach for Complexity-Causality measure. PeerJ Computer Science, 5, e196.
+"""
+abstract type InterventionalComplexityCausalityEstimator end
+
+"""
+    icc(x::Vector{Int}, y::Vector{Int}, estimator)
+
+Interventional complexity causality (ICC) is defined as[^Kathpalia2019] *the change in the
+dynamical complexity of time series X when `Xⁱ` is seen to be generated jointly by the
+dynamical evolution of both `Y⁻` and `X⁻` as opposed to by the reality of the dynamical
+evolution of `X⁻` alone"*, that is
+
+```math
+ICC_{Y^{-} \\to X^{i}} = DC(X^{i} | X^{i}) - DC(X^{i} | X^{-}, Y^{-}),
+```
+
+where
+
+- DC is [`dynamical_complexity`](@ref).
+- `Xⁱ` is a block of current values of `X` (`ΔX` in the original paper),
+- `X⁻` is a block of past values of `X`, not overlapping with and immediately before `Xⁱ`, and
+- `Y⁻` is a block of values from a time series `Y` taken at the same indices as `X⁻`.
+
+`icc` computes the *mean ICC* across a series of sliding windows over the input time series
+`x` and `y`, where the sliding window configuration is dictated by the `estimator`.
+
+## Interpretation
+
+If ``ICC_{Y \\to X}`` is statistically zero, then it implies no causal influence from
+``Y`` to ``X``. If ``ICC_{Y \\to X}`` is statistically different from zero, then it is
+inferred that ``Y`` dynamically influences ``X``, and a higher magnitudes indicates higher
+degrees of causation.
+
+``ICC_{Y \\to X}`` can be both positive and negative. For details on interpretating both
+signs, see Kathpalia and Nagaraj (2019)[^Kathpalia2019].
+
+## Examples
+
+```jldoctest; setup = :(using CausalityTools)
+using Statistics, Random
+rng = MersenneTwister(1234)
+x = rand(rng, 0:1, 500)
+y = rand(rng, 0:1, 500)
+est = CompressionComplexityCausalityEstimator(
+    algorithm = EffortToCompress(normalize = true),
+    w = 15,
+    L = 30,
+    step = 10)
+icc(x, y, est)
+
+# output
+0.10375494071146242
+```
+
+See also: [`CompressionComplexityCausalityEstimator`](@ref), [`EffortToCompress`](@ref)
+
+[^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach for Complexity-Causality measure. PeerJ Computer Science, 5, e196.
+"""
+function interventional_complexity_causality end
+const icc = interventional_complexity_causality
diff --git a/src/interface.jl b/src/interface.jl
@@ -0,0 +1,4 @@
+""" The supertype of all time series causality estimators. """
+abstract type CausalityEstimator end
+
+include("utils/sliding_window.jl")
diff --git a/src/methods/compression/compression.jl b/src/methods/compression/compression.jl
@@ -0,0 +1 @@
+include("compression_complexity/api.jl")
diff --git a/src/methods/compression/compression_complexity/api.jl b/src/methods/compression/compression_complexity/api.jl
@@ -0,0 +1,132 @@
+abstract type CompressionComplexityEstimator <: ComplexityEstimator end
+include("effort_to_compress/effort_to_compress.jl")
+
+
+# export compression_complexity
+# import ComplexityMeasures: ComplexityMeasure
+# abstract type CompressionComplexityMeasure <: ComplexityMeasure end
+
+# """
+# # Regular compression complexity
+
+#     compression_complexity(x, algorithm) → N
+
+# Compute the compression complexity of the pre-binned/pre-symbolized (integer-valued)
+# univariate time series `x` using the given `algorithm`.
+
+#     compression_complexity(x::StateSpaceSet, algorithm, alphabet_size::Int) → N
+
+# Multivariate integer time series `x`, given as `StateSpaceSet`s, are first transformed into a
+# 1D symbol sequence before computing compression complexity. This transformation assuming
+# that each variable `xᵢ ∈ x` was symbolized using the same `alphabet_size`. The resulting
+# 1D symbol sequence is (in this implementation) not correct if different alphabet sizes are
+# used, so ensure during pre-processing that the same alphabet is used (e.g.
+# `alphabet_size = 2` for a binary time series, and `alphabet_size = 5` for a five-box binned
+# time series).
+
+
+# # Examples
+
+# Quantifying the compression complexity of a univariate symbol sequence using
+# the [`EffortToCompress`](@ref) algorithm.
+
+# ```jldoctest; setup = :(using CausalityTools)
+# compression_complexity([1, 2, 1, 2, 1, 1, 1, 2], EffortToCompress())
+
+# # output
+# 5.0
+# ```
+
+# Multivariate time series given as [`StateSpaceSet`](@ref)s also work, but must be
+# symbolized using the same alphabet.
+
+# ```jldoctest; setup = :(using CausalityTools)
+# x = [1, 2, 1, 2, 1, 1, 1, 2]
+# y = [2, 1, 2, 2, 2, 1, 1, 2]
+# alphabet_size = 2
+# compression_complexity(StateSpaceSet(x, y), EffortToCompress(), alphabet_size)
+
+# # output
+# 7.0
+# ```
+
+# Sliding window estimators automatically handle window creation,
+# and returns a vector of complexity measures computed on each window.
+
+# ```jldoctest; setup = :(using CausalityTools)
+# using Random; rng = MersenneTwister(1234);
+# x = rand(rng, 0:1, 100)
+# alg = EffortToCompress(normalize = false)
+# sw = ConstantWidthSlidingWindow(alg, width = 25, step = 15)
+# compression_complexity(x, sw)
+
+# # output
+# 5-element Vector{Float64}:
+#  12.0
+#  14.0
+#  14.0
+#  14.0
+#  11.0
+# ```
+
+# # Joint compression complexity
+
+#     compression_complexity(x::AbstractVector, y::AbstractVector, algorithm) → N
+#     compression_complexity(x::StateSpaceSet, y::StateSpaceSet, algorithm, ax::Int, ay::Int) → N
+
+# If a two time series `y` is provided, compute the joint compression
+# complexity using alphabet size `ax` for `x` and alphabet size `ay` for `y`.
+
+# `x` and `y` must be either both integer-valued vectors, or both `StateSpaceSet`s (potentially
+# of different dimensions). If computing the joint compression complexity between a
+# univariate time series and a multivariate time series, simply wrap the univariate time
+# series in a `StateSpaceSet`.
+
+# # Examples
+
+# Joint compression complexity between two time series:
+
+# ```jldoctest; setup = :(using CausalityTools)
+# using  Random; rng = MersenneTwister(1234);
+# x = rand(rng, 0:2, 100)
+# y = rand(rng, 0:2, 100)
+# alg = EffortToCompress(normalize = true)
+# compression_complexity(x, y, alg)
+
+# # output
+# 0.2222222222222222
+# ```
+
+# For multivariate time series, we must specify the alphabet size
+# for each variable to ensure that results are correct.
+
+# ```jldoctest; setup = :(using CausalityTools, DelayEmbeddings)
+# # Multivariate time series X has variables encoded with a 2-letter alphabet,
+# x1 = [1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0]
+# x2 = [1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0]
+# X = StateSpaceSet(x1, x2)
+
+# # Multivariate time series Y has variables encoded with a 3-letter alphabet
+# y1 = [1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0]
+# y2 = [2, 2, 0, 2, 2, 2, 2, 2, 1, 1, 2]
+# y3 = [2, 2, 0, 2, 2, 2, 1, 2, 1, 1, 2]
+# Y = StateSpaceSet(y1, y2, y3)
+
+# alg = EffortToCompress(normalize = true)
+# compression_complexity(X, Y, alg, 2, 3)
+
+# # output
+# 0.9
+# ```
+
+# # Returns
+
+# See individual algorithms for details on their return values. A `Vector` of compression
+# complexities is returned if a sliding window algorithm is used - one value per window.
+# ```
+
+# See also: [`EffortToCompress`](@ref), [`EffortToCompressSlidingWindow`](@ref).
+
+# [^Kathpalia2019]: Kathpalia, A., & Nagaraj, N. (2019). Data-based intervention approach for Complexity-Causality measure. PeerJ Computer Science, 5, e196.
+# """
+# function compression_complexity end