From a5b4812a1fe163ce66f7137f17cb009a81cf1e63 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 25 Apr 2023 13:02:32 +0200
Subject: [PATCH 001/134] update conditional targets

---
 doubleml/_utils.py         |  6 ++++++
 doubleml/double_ml_cvar.py |  5 ++---
 doubleml/double_ml_irm.py  | 10 +++-------
 doubleml/double_ml_lpq.py  | 13 ++++++-------
 doubleml/double_ml_pq.py   |  5 ++---
 5 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/doubleml/_utils.py b/doubleml/_utils.py
index e2d450f2..c95d92e7 100644
--- a/doubleml/_utils.py
+++ b/doubleml/_utils.py
@@ -371,3 +371,9 @@ def abs_ipw_score(theta):
                           method='brent')
     ipw_est = res.x
     return ipw_est
+
+
+def _cond_targets(target, cond_sample):
+    cond_target = target.astype(float)
+    cond_target[np.invert(cond_sample)] = np.nan
+    return cond_target
diff --git a/doubleml/double_ml_cvar.py b/doubleml/double_ml_cvar.py
index 6958b6b2..31f446ea 100644
--- a/doubleml/double_ml_cvar.py
+++ b/doubleml/double_ml_cvar.py
@@ -8,7 +8,7 @@
 from .double_ml_score_mixins import LinearScoreMixin
 from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _check_contains_iv, \
     _check_zero_one_treatment, _check_quantile, _check_treatment, _check_trimming, _check_score, \
-    _normalize_ipw, _dml_tune, _get_bracket_guess, _solve_ipw_score
+    _normalize_ipw, _dml_tune, _get_bracket_guess, _solve_ipw_score, _cond_targets
 from .double_ml_data import DoubleMLData
 from ._utils_resampling import DoubleMLResampling
 
@@ -295,8 +295,7 @@ def ipw_score(theta):
         m_hat['targets'] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = g_hat['targets'].astype(float)
-        g_hat['targets'][d != self.treatment] = np.nan
+        g_hat['targets'] = _cond_targets(g_hat['targets'], cond_sample=(d == self.treatment))
 
         if return_models:
             g_hat['models'] = fitted_models['ml_g']
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index edb43cc1..063d2d6b 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -11,7 +11,7 @@
 from .double_ml_score_mixins import LinearScoreMixin
 
 from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _check_finite_predictions, _check_is_propensity, \
-    _trimm, _normalize_ipw
+    _trimm, _normalize_ipw, _cond_targets
 
 
 class DoubleMLIRM(LinearScoreMixin, DoubleML):
@@ -212,9 +212,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
                                  est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
                                  return_models=return_models)
         _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][d == 1] = np.nan
+        g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
 
         if self._dml_data.binary_outcome:
             binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
@@ -231,9 +229,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
                                      est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
                                      return_models=return_models)
             _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
-            # adjust target values to consider only compatible subsamples
-            g_hat1['targets'] = g_hat1['targets'].astype(float)
-            g_hat1['targets'][d == 0] = np.nan
+            g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
 
             if self._dml_data.binary_outcome:
                 binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 658bfe13..97aebb23 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -9,7 +9,7 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _check_zero_one_treatment, _check_score,\
     _check_trimming, _check_quantile, _check_treatment, _get_bracket_guess, _default_kde, _normalize_ipw, _dml_tune, \
-    _solve_ipw_score
+    _solve_ipw_score, _cond_targets
 from .double_ml_data import DoubleMLData
 from ._utils_resampling import DoubleMLResampling
 
@@ -423,15 +423,14 @@ def ipw_score(theta):
 
         # save targets and models
         m_z_hat['targets'] = z
+
         # set targets to relevant subsample
-        g_du_z0_hat['targets'][z == 1] = np.nan
-        g_du_z1_hat['targets'][z == 0] = np.nan
+        g_du_z0_hat['targets'] = _cond_targets(g_du_z0_hat['targets'], cond_sample=(z == 0))
+        g_du_z1_hat['targets'] = _cond_targets(g_du_z1_hat['targets'], cond_sample=(z == 1))
 
         # the predictions of both should only be evaluated conditional on z == 0 or z == 1
-        m_d_z0_hat['targets'][z == 0] = d[z == 0]
-        m_d_z0_hat['targets'][z == 1] = np.nan
-        m_d_z1_hat['targets'][z == 1] = d[z == 1]
-        m_d_z1_hat['targets'][z == 0] = np.nan
+        m_d_z0_hat['targets'] = _cond_targets(d, cond_sample=(z == 0))
+        m_d_z0_hat['targets'] = _cond_targets(d, cond_sample=(z == 1))
 
         if return_models:
             m_z_hat['models'] = fitted_models['ml_m_z']
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index 0bcd5b64..e59e2dda 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -8,7 +8,7 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _check_contains_iv, \
     _check_zero_one_treatment, _check_quantile, _check_treatment, _check_trimming, _check_score, _get_bracket_guess, \
-    _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score
+    _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score, _cond_targets
 from .double_ml_data import DoubleMLData
 from ._utils_resampling import DoubleMLResampling
 
@@ -337,8 +337,7 @@ def ipw_score(theta):
         m_hat['targets'] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = g_hat['targets'].astype(float)
-        g_hat['targets'][d != self.treatment] = np.nan
+        g_hat['targets'] = _cond_targets(g_hat['targets'], cond_sample=(d == self.treatment))
 
         if return_models:
             g_hat['models'] = fitted_models['ml_g']

From a7b46288e8c02edfc1d31de809b7fa4ce8cfb4ff Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 25 Apr 2023 13:03:09 +0200
Subject: [PATCH 002/134] update fit method to supply a dict

---
 doubleml/double_ml.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 1e37cb5c..7ce819b5 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -458,7 +458,7 @@ def __psi_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_treat, self._i_rep]
 
-    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
+    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supply_predictions=None):
         """
         Estimate DoubleML models.
 
@@ -477,6 +477,11 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
             to analyze the fitted models or extract information like variable importance.
             Default is ``False``.
 
+        supply_predictions : None or dict
+            If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
+            for a specific learner is supplied, the model will use the supplied nuisance predictions instead.
+            Default is `None`.
+
         Returns
         -------
         self : object
@@ -495,6 +500,11 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
             raise TypeError('store_models must be True or False. '
                             f'Got {str(store_models)}.')
 
+        if supply_predictions is not None:
+            if not isinstance(supply_predictions, dict):
+                raise TypeError('The predictions must be a dictionary. '
+                                f'{str(supply_predictions)} of type {str(type(supply_predictions))} was passed.')
+
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
 

From 8b5e77821fd4542359d0015930ed26243680d632 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 26 Apr 2023 11:49:41 +0200
Subject: [PATCH 003/134] checks on supplied_predictions

---
 doubleml/double_ml.py                      | 55 +++++++++++++--
 doubleml/tests/test_doubleml_exceptions.py | 78 ++++++++++++++++++++++
 2 files changed, 127 insertions(+), 6 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 7ce819b5..4422cef6 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -458,7 +458,7 @@ def __psi_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_treat, self._i_rep]
 
-    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supply_predictions=None):
+    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supplied_predictions=None):
         """
         Estimate DoubleML models.
 
@@ -477,7 +477,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supply
             to analyze the fitted models or extract information like variable importance.
             Default is ``False``.
 
-        supply_predictions : None or dict
+        supplied_predictions : None or dict
             If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
             for a specific learner is supplied, the model will use the supplied nuisance predictions instead.
             Default is `None`.
@@ -500,10 +500,8 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supply
             raise TypeError('store_models must be True or False. '
                             f'Got {str(store_models)}.')
 
-        if supply_predictions is not None:
-            if not isinstance(supply_predictions, dict):
-                raise TypeError('The predictions must be a dictionary. '
-                                f'{str(supply_predictions)} of type {str(type(supply_predictions))} was passed.')
+        # check prediction format
+        self._check_supplied_predictions(supplied_predictions)
 
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
@@ -1003,6 +1001,51 @@ def _check_learner(learner, learner_name, regressor, classifier):
 
         return learner_is_classifier
 
+    def _check_supplied_predictions(self, supplied_predictions):
+        if supplied_predictions is not None:
+            if not isinstance(supplied_predictions, dict):
+                raise TypeError('The predictions must be a dictionary. '
+                                f'{str(supplied_predictions)} of type {str(type(supplied_predictions))} was passed.')
+
+            if self.n_rep > 1:
+                raise NotImplementedError('supplied_predictions is not yet implmented for ``n_rep > 1``.')
+
+            supplied_treatments = list(supplied_predictions.keys())
+            valid_treatments = self._dml_data.d_cols
+            if not set(supplied_treatments).issubset(valid_treatments):
+                raise ValueError('Invalid supplied_predictions. '
+                                 f'Invalid treatment variable in {str(supplied_treatments)}. '
+                                 'Valid treatment variables ' + ' or '.join(valid_treatments) + '.')
+
+            for treatment in supplied_treatments:
+                if not isinstance(supplied_predictions[treatment], dict):
+                    raise TypeError('supplied_predictions must be a nested dictionary. '
+                                    f'For treatment {str(treatment)} a value of type '
+                                    f'{str(type(supplied_predictions[treatment]))} was passed.')
+
+                supplied_learners = list(supplied_predictions[treatment].keys())
+                valid_learners = self.params_names
+                if not set(supplied_learners).issubset(valid_learners):
+                    raise ValueError('Invalid supplied_predictions. '
+                                     f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
+                                     'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
+
+                for learner in supplied_learners:
+                    if not isinstance(supplied_predictions[treatment][learner],  np.ndarray):
+                        raise TypeError('Invalid supplied_predictions. '
+                                        'The values of the nested list must be a numpy array. '
+                                        'Invalid predictions for treatment ' + str(treatment) +
+                                        ' and learner ' + str(learner) + '. ' +
+                                        f'Object of type {str(type(supplied_predictions[treatment][learner]))} was passed.')
+
+                    expected_shape = (self._dml_data.n_obs, )
+                    if supplied_predictions[treatment][learner].shape != expected_shape:
+                        raise ValueError('Invalid supplied_predictions. '
+                                         f'The supplied predictions have to be of shape {str(expected_shape)}. '
+                                         'Invalid predictions for treatment ' + str(treatment) +
+                                         ' and learner ' + str(learner) + '. ' +
+                                         f'Predictions of shape {str(supplied_predictions[treatment][learner].shape)} passed.')
+
     def _initialize_arrays(self):
         psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
         psi_deriv = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 10f38c24..899f9f41 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -977,3 +977,81 @@ def eval_fct(y_pred, y_true):
         return np.nan
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.evaluate_learners(metric=eval_fct)
+
+
+@pytest.mark.ci
+def test_double_ml_supply_predictions():
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATE',
+                              n_rep=2)
+
+    msg = "The predictions must be a dictionary. ml_m of type <class 'str'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(supplied_predictions="ml_m")
+
+    predictions = {'ml_f': 'test'}
+    msg = "supplied_predictions is not yet implmented for ``n_rep > 1``."
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    dml_irm_obj = DoubleMLIRM(dml_data_irm,
+                              ml_g=Lasso(),
+                              ml_m=LogisticRegression(),
+                              trimming_threshold=0.05,
+                              n_folds=5,
+                              score='ATE',
+                              n_rep=1)
+
+    predictions = {'d': 'test', 'd_f': 'test'}
+    msg = (r"Invalid supplied_predictions. Invalid treatment variable in \['d', 'd_f'\]. "
+           "Valid treatment variables d.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': 'test'}
+    msg = ("supplied_predictions must be a nested dictionary. "
+           "For treatment d a value of type <class 'str'> was passed.")
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_f': 'test'}}
+    msg = ("Invalid supplied_predictions. "
+           r"Invalid nuisance learner for treatment d in \['ml_f'\]. "
+           "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_m': 'test', 'ml_f': 'test'}}
+    msg = ("Invalid supplied_predictions. "
+           r"Invalid nuisance learner for treatment d in \['ml_m', 'ml_f'\]. "
+           "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_m': 'test'}}
+    msg = ("Invalid supplied_predictions. "
+           "The values of the nested list must be a numpy array. "
+           "Invalid predictions for treatment d and learner ml_m. "
+           "Object of type <class 'str'> was passed.")
+    with pytest.raises(TypeError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_m': np.array([0])}}
+    msg = ('Invalid supplied_predictions. '
+           r'The supplied predictions have to be of shape \(100,\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(1,\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)
+
+    predictions = {'d': {'ml_m': np.ones(shape=(5, 3))}}
+    msg = ('Invalid supplied_predictions. '
+           r'The supplied predictions have to be of shape \(100,\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(5, 3\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(supplied_predictions=predictions)

From 1856aab95ef094fd692774c4f09a03cc12255fa9 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 26 Apr 2023 13:23:49 +0200
Subject: [PATCH 004/134] refactor to external_predictions

---
 doubleml/double_ml.py                      | 60 ++++++++++++----------
 doubleml/tests/test_doubleml_exceptions.py | 38 +++++++-------
 2 files changed, 53 insertions(+), 45 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 4422cef6..c8b6e2f3 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -458,7 +458,7 @@ def __psi_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_treat, self._i_rep]
 
-    def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, supplied_predictions=None):
+    def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None, store_models=False):
         """
         Estimate DoubleML models.
 
@@ -477,9 +477,11 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, suppli
             to analyze the fitted models or extract information like variable importance.
             Default is ``False``.
 
-        supplied_predictions : None or dict
+        external_predictions : None or dict
             If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
-            for a specific learner is supplied, the model will use the supplied nuisance predictions instead.
+            for a specific learner is supplied, the model will use the supplied nuisance predictions instead. Has to
+            be a nested dictionary where the keys refer to the treatment and the keys of the nested dictionarys refer to the 
+            corresponding learners.
             Default is `None`.
 
         Returns
@@ -501,7 +503,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, suppli
                             f'Got {str(store_models)}.')
 
         # check prediction format
-        self._check_supplied_predictions(supplied_predictions)
+        self._check_external_predictions(external_predictions)
 
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
@@ -516,13 +518,19 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False, suppli
             self._i_rep = i_rep
             for i_d in range(self._dml_data.n_treat):
                 self._i_treat = i_d
-
+                
                 # this step could be skipped for the single treatment variable case
                 if self._dml_data.n_treat > 1:
                     self._dml_data.set_x_d(self._dml_data.d_cols[i_d])
 
+                # set the supplied predictions for the treatment and each learner (including None)
+                prediction_dict = {}
+                for learner in self.params_names:
+                    prediction_dict[learner] = None
+
                 # ml estimation of nuisance models and computation of score elements
-                score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv, return_models=store_models)
+                score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv,
+                                                           return_models=store_models)
 
                 self._set_score_elements(score_elements, self._i_rep, self._i_treat)
 
@@ -949,7 +957,7 @@ def _initialize_ml_nuisance_params(self):
         pass
 
     @abstractmethod
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models):
+    def _nuisance_est(self, smpls, n_jobs_cv, return_models, external_predictions):
         pass
 
     @abstractmethod
@@ -1001,50 +1009,50 @@ def _check_learner(learner, learner_name, regressor, classifier):
 
         return learner_is_classifier
 
-    def _check_supplied_predictions(self, supplied_predictions):
-        if supplied_predictions is not None:
-            if not isinstance(supplied_predictions, dict):
-                raise TypeError('The predictions must be a dictionary. '
-                                f'{str(supplied_predictions)} of type {str(type(supplied_predictions))} was passed.')
+    def _check_external_predictions(self, external_predictions):
+        if external_predictions is not None:
+            if not isinstance(external_predictions, dict):
+                raise TypeError('external_predictions must be a dictionary. '
+                                f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
             if self.n_rep > 1:
-                raise NotImplementedError('supplied_predictions is not yet implmented for ``n_rep > 1``.')
+                raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
 
-            supplied_treatments = list(supplied_predictions.keys())
+            supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols
             if not set(supplied_treatments).issubset(valid_treatments):
-                raise ValueError('Invalid supplied_predictions. '
+                raise ValueError('Invalid external_predictions. '
                                  f'Invalid treatment variable in {str(supplied_treatments)}. '
                                  'Valid treatment variables ' + ' or '.join(valid_treatments) + '.')
 
             for treatment in supplied_treatments:
-                if not isinstance(supplied_predictions[treatment], dict):
-                    raise TypeError('supplied_predictions must be a nested dictionary. '
+                if not isinstance(external_predictions[treatment], dict):
+                    raise TypeError('external_predictions must be a nested dictionary. '
                                     f'For treatment {str(treatment)} a value of type '
-                                    f'{str(type(supplied_predictions[treatment]))} was passed.')
+                                    f'{str(type(external_predictions[treatment]))} was passed.')
 
-                supplied_learners = list(supplied_predictions[treatment].keys())
+                supplied_learners = list(external_predictions[treatment].keys())
                 valid_learners = self.params_names
                 if not set(supplied_learners).issubset(valid_learners):
-                    raise ValueError('Invalid supplied_predictions. '
+                    raise ValueError('Invalid external_predictions. '
                                      f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
                                      'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
 
                 for learner in supplied_learners:
-                    if not isinstance(supplied_predictions[treatment][learner],  np.ndarray):
-                        raise TypeError('Invalid supplied_predictions. '
+                    if not isinstance(external_predictions[treatment][learner],  np.ndarray):
+                        raise TypeError('Invalid external_predictions. '
                                         'The values of the nested list must be a numpy array. '
                                         'Invalid predictions for treatment ' + str(treatment) +
                                         ' and learner ' + str(learner) + '. ' +
-                                        f'Object of type {str(type(supplied_predictions[treatment][learner]))} was passed.')
+                                        f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
 
                     expected_shape = (self._dml_data.n_obs, )
-                    if supplied_predictions[treatment][learner].shape != expected_shape:
-                        raise ValueError('Invalid supplied_predictions. '
+                    if external_predictions[treatment][learner].shape != expected_shape:
+                        raise ValueError('Invalid external_predictions. '
                                          f'The supplied predictions have to be of shape {str(expected_shape)}. '
                                          'Invalid predictions for treatment ' + str(treatment) +
                                          ' and learner ' + str(learner) + '. ' +
-                                         f'Predictions of shape {str(supplied_predictions[treatment][learner].shape)} passed.')
+                                         f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
 
     def _initialize_arrays(self):
         psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 899f9f41..aadd7a62 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -980,7 +980,7 @@ def eval_fct(y_pred, y_true):
 
 
 @pytest.mark.ci
-def test_double_ml_supply_predictions():
+def test_double_ml_external_predictions():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
                               ml_m=LogisticRegression(),
@@ -989,14 +989,14 @@ def test_double_ml_supply_predictions():
                               score='ATE',
                               n_rep=2)
 
-    msg = "The predictions must be a dictionary. ml_m of type <class 'str'> was passed."
+    msg = "external_predictions must be a dictionary. ml_m of type <class 'str'> was passed."
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.fit(supplied_predictions="ml_m")
+        dml_irm_obj.fit(external_predictions="ml_m")
 
     predictions = {'ml_f': 'test'}
-    msg = "supplied_predictions is not yet implmented for ``n_rep > 1``."
+    msg = "external_predictions is not yet implmented for ``n_rep > 1``."
     with pytest.raises(NotImplementedError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
@@ -1007,51 +1007,51 @@ def test_double_ml_supply_predictions():
                               n_rep=1)
 
     predictions = {'d': 'test', 'd_f': 'test'}
-    msg = (r"Invalid supplied_predictions. Invalid treatment variable in \['d', 'd_f'\]. "
+    msg = (r"Invalid external_predictions. Invalid treatment variable in \['d', 'd_f'\]. "
            "Valid treatment variables d.")
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': 'test'}
-    msg = ("supplied_predictions must be a nested dictionary. "
+    msg = ("external_predictions must be a nested dictionary. "
            "For treatment d a value of type <class 'str'> was passed.")
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_f': 'test'}}
-    msg = ("Invalid supplied_predictions. "
+    msg = ("Invalid external_predictions. "
            r"Invalid nuisance learner for treatment d in \['ml_f'\]. "
            "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_m': 'test', 'ml_f': 'test'}}
-    msg = ("Invalid supplied_predictions. "
+    msg = ("Invalid external_predictions. "
            r"Invalid nuisance learner for treatment d in \['ml_m', 'ml_f'\]. "
            "Valid nuisance learners ml_g0 or ml_g1 or ml_m.")
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_m': 'test'}}
-    msg = ("Invalid supplied_predictions. "
+    msg = ("Invalid external_predictions. "
            "The values of the nested list must be a numpy array. "
            "Invalid predictions for treatment d and learner ml_m. "
            "Object of type <class 'str'> was passed.")
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_m': np.array([0])}}
-    msg = ('Invalid supplied_predictions. '
+    msg = ('Invalid external_predictions. '
            r'The supplied predictions have to be of shape \(100,\). '
            'Invalid predictions for treatment d and learner ml_m. '
            r'Predictions of shape \(1,\) passed.')
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)
 
     predictions = {'d': {'ml_m': np.ones(shape=(5, 3))}}
-    msg = ('Invalid supplied_predictions. '
+    msg = ('Invalid external_predictions. '
            r'The supplied predictions have to be of shape \(100,\). '
            'Invalid predictions for treatment d and learner ml_m. '
            r'Predictions of shape \(5, 3\) passed.')
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.fit(supplied_predictions=predictions)
+        dml_irm_obj.fit(external_predictions=predictions)

From 189ac0e6cea43ea35059fbb287035eaffcfc2cfd Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 26 Apr 2023 16:19:15 +0200
Subject: [PATCH 005/134] extend IRM with external predictions

---
 doubleml/double_ml.py               |  10 ++-
 doubleml/double_ml_irm.py           | 102 ++++++++++++++++------------
 doubleml/tests/_utils_irm_manual.py |  30 ++++----
 doubleml/tests/test_irm.py          |  42 +++++++++++-
 4 files changed, 123 insertions(+), 61 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index c8b6e2f3..22799ecb 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -524,12 +524,18 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     self._dml_data.set_x_d(self._dml_data.d_cols[i_d])
 
                 # set the supplied predictions for the treatment and each learner (including None)
-                prediction_dict = {}
+                ext_prediction_dict = {}
                 for learner in self.params_names:
-                    prediction_dict[learner] = None
+                    if external_predictions is None:
+                        ext_prediction_dict[learner] = None
+                    elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
+                        ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner]
+                    else:
+                        ext_prediction_dict[learner] = None
 
                 # ml estimation of nuisance models and computation of score elements
                 score_elements, preds = self._nuisance_est(self.__smpls, n_jobs_cv,
+                                                           external_predictions=ext_prediction_dict,
                                                            return_models=store_models)
 
                 self._set_score_elements(score_elements, self._i_rep, self._i_treat)
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 063d2d6b..41a6b898 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pandas as pd
 import warnings
+import copy
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
@@ -199,7 +200,7 @@ def _check_data(self, obj_dml_data):
                              'needs to be specified as treatment variable.')
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
@@ -208,44 +209,60 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
 
         # nuisance g
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
-
-        if self._dml_data.binary_outcome:
-            binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
-            zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0)
-            if binary_preds & zero_one_preds:
-                raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                 f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                 'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                 'probabilities and not labels are predicted.')
-
-        g_hat1 = {'preds': None, 'targets': None, 'models': None}
-        if (self.score == 'ATE') | callable(self.score):
-            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
-            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
-            g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
+        if external_predictions['ml_g0'] is None:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
 
             if self._dml_data.binary_outcome:
-                binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
-                zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0)
+                binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
+                zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0)
                 if binary_preds & zero_one_preds:
                     raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                     f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                     'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                     'probabilities and not labels are predicted.')
+                                        f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                        'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                        'probabilities and not labels are predicted.')
+        else:
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+
+        g_hat1 = {'preds': None, 'targets': None, 'models': None}
+        if (self.score == 'ATE') | callable(self.score):
+            if external_predictions['ml_g1'] is None:
+                g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+                _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+                g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
+
+                if self._dml_data.binary_outcome:
+                    binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
+                    zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0)
+                    if binary_preds & zero_one_preds:
+                        raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
+                                        f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                        'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                        'probabilities and not labels are predicted.')
+            else:
+                g_hat1 = {'preds': external_predictions['ml_g1'],
+                          'targets': None,
+                          'models': None}
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-        _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+        if external_predictions['ml_m'] is None:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        else:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
 
         psi_a, psi_b = self._score_elements(y, d,
                                             g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
@@ -273,14 +290,13 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
             for _, test_index in smpls:
                 p_hat[test_index] = np.mean(d[test_index])
 
-        m_hat = _trimm(m_hat, self.trimming_rule, self.trimming_threshold)
-
+        m_hat_adj = copy.deepcopy(m_hat)
         if self.normalize_ipw:
             if self.dml_procedure == 'dml1':
                 for _, test_index in smpls:
-                    m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+                    m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
             else:
-                m_hat = _normalize_ipw(m_hat, d)
+                m_hat_adj = _normalize_ipw(m_hat, d)
 
         # compute residuals
         u_hat0 = y - g_hat0
@@ -291,19 +307,19 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
         if isinstance(self.score, str):
             if self.score == 'ATE':
                 psi_b = g_hat1 - g_hat0 \
-                    + np.divide(np.multiply(d, u_hat1), m_hat) \
-                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
-                psi_a = np.full_like(m_hat, -1.0)
+                    + np.divide(np.multiply(d, u_hat1), m_hat_adj) \
+                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat_adj)
+                psi_a = np.full_like(m_hat_adj, -1.0)
             else:
                 assert self.score == 'ATTE'
                 psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
-                    - np.divide(np.multiply(m_hat, np.multiply(1.0-d, u_hat0)),
-                                np.multiply(p_hat, (1.0 - m_hat)))
+                    - np.divide(np.multiply(m_hat_adj, np.multiply(1.0-d, u_hat0)),
+                                np.multiply(p_hat, (1.0 - m_hat_adj)))
                 psi_a = - np.divide(d, p_hat)
         else:
             assert callable(self.score)
             psi_a, psi_b = self.score(y=y, d=d,
-                                      g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat,
+                                      g_hat0=g_hat0, g_hat1=g_hat1, m_hat=m_hat_adj,
                                       smpls=smpls)
 
         return psi_a, psi_b
diff --git a/doubleml/tests/_utils_irm_manual.py b/doubleml/tests/_utils_irm_manual.py
index 5328630b..f0299515 100644
--- a/doubleml/tests/_utils_irm_manual.py
+++ b/doubleml/tests/_utils_irm_manual.py
@@ -1,4 +1,5 @@
 import numpy as np
+import copy
 from sklearn.base import clone, is_classifier
 
 from ._utils_boot import boot_manual, draw_weights
@@ -139,20 +140,21 @@ def irm_dml1(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
+    m_hat_adj = copy.deepcopy(m_hat)
     if normalize_ipw:
         for _, test_index in smpls:
-            m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+            m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
 
     for idx, (_, test_index) in enumerate(smpls):
         thetas[idx] = irm_orth(g_hat0[test_index], g_hat1[test_index],
-                               m_hat[test_index], p_hat[test_index],
+                               m_hat_adj[test_index], p_hat[test_index],
                                u_hat0[test_index], u_hat1[test_index],
                                d[test_index], score)
     theta_hat = np.mean(thetas)
 
     if len(smpls) > 1:
         se = np.sqrt(var_irm(theta_hat, g_hat0, g_hat1,
-                             m_hat, p_hat,
+                             m_hat_adj, p_hat,
                              u_hat0, u_hat1,
                              d, score, n_obs))
     else:
@@ -160,7 +162,7 @@ def irm_dml1(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
         test_index = smpls[0][1]
         n_obs = len(test_index)
         se = np.sqrt(var_irm(theta_hat, g_hat0[test_index], g_hat1[test_index],
-                             m_hat[test_index], p_hat[test_index],
+                             m_hat_adj[test_index], p_hat[test_index],
                              u_hat0[test_index], u_hat1[test_index],
                              d[test_index], score, n_obs))
 
@@ -172,13 +174,14 @@ def irm_dml2(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
+    m_hat_adj = copy.deepcopy(m_hat)
     if normalize_ipw:
-        m_hat = _normalize_ipw(m_hat, d)
+        m_hat_adj = _normalize_ipw(m_hat, d)
 
-    theta_hat = irm_orth(g_hat0, g_hat1, m_hat, p_hat,
+    theta_hat = irm_orth(g_hat0, g_hat1, m_hat_adj, p_hat,
                          u_hat0, u_hat1, d, score)
     se = np.sqrt(var_irm(theta_hat, g_hat0, g_hat1,
-                         m_hat, p_hat,
+                         m_hat_adj, p_hat,
                          u_hat0, u_hat1,
                          d, score, n_obs))
 
@@ -249,12 +252,13 @@ def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_h
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
+    m_hat_adj = copy.deepcopy(m_hat)
     if normalize_ipw:
         if dml_procedure == 'dml1':
             for _, test_index in smpls:
-                m_hat[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+                m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
         else:
-            m_hat = _normalize_ipw(m_hat, d)
+            m_hat_adj = _normalize_ipw(m_hat, d)
 
     if apply_cross_fitting:
         if score == 'ATE':
@@ -272,13 +276,13 @@ def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_h
 
     if score == 'ATE':
         psi = g_hat1 - g_hat0 \
-                + np.divide(np.multiply(d, u_hat1), m_hat) \
-                - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat) - theta
+                + np.divide(np.multiply(d, u_hat1), m_hat_adj) \
+                - np.divide(np.multiply(1.-d, u_hat0), 1.-m_hat_adj) - theta
     else:
         assert score == 'ATTE'
         psi = np.divide(np.multiply(d, u_hat0), p_hat) \
-            - np.divide(np.multiply(m_hat, np.multiply(1.-d, u_hat0)),
-                        np.multiply(p_hat, (1.-m_hat))) \
+            - np.divide(np.multiply(m_hat_adj, np.multiply(1.-d, u_hat0)),
+                        np.multiply(p_hat, (1.-m_hat_adj))) \
             - theta * np.divide(d, p_hat)
 
     boot_theta, boot_t_stat = boot_manual(psi, J, smpls, se, weights, n_rep_boot, apply_cross_fitting)
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index d28e628e..4592e167 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -18,8 +18,8 @@
 @pytest.fixture(scope='module',
                 params=[[LinearRegression(),
                          LogisticRegression(solver='lbfgs', max_iter=250)],
-                        [RandomForestRegressor(max_depth=5, n_estimators=10),
-                         RandomForestClassifier(max_depth=5, n_estimators=10)]])
+                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
 def learner(request):
     return request.param
 
@@ -87,10 +87,31 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
                          normalize_ipw=normalize_ipw,
                          trimming_threshold=trimming_threshold)
 
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    dml_irm_obj_ext = dml.DoubleMLIRM(obj_dml_data,
+                                      ml_g, ml_m,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure,
+                                      normalize_ipw=normalize_ipw,
+                                      draw_sample_splitting=False,
+                                      trimming_threshold=trimming_threshold)
+
+    # synchronize the sample splitting
+    dml_irm_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+ 
+    prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1),
+                             'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1),
+                             'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1)}}
+    dml_irm_obj_ext.fit(external_predictions=prediction_dict)
+
     res_dict = {'coef': dml_irm_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_irm_obj_ext.coef,
                 'se': dml_irm_obj.se,
                 'se_manual': res_manual['se'],
+                'se_ext': dml_irm_obj_ext.se,
                 'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
@@ -104,10 +125,14 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
 
         np.random.seed(3141)
         dml_irm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_irm_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         res_dict['boot_coef' + bootstrap] = dml_irm_obj.boot_coef
         res_dict['boot_t_stat' + bootstrap] = dml_irm_obj.boot_t_stat
         res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta
         res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat
+        res_dict['boot_coef' + bootstrap + '_ext'] = dml_irm_obj_ext.boot_coef
+        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_irm_obj_ext.boot_t_stat
 
     return res_dict
 
@@ -117,6 +142,9 @@ def test_dml_irm_coef(dml_irm_fixture):
     assert math.isclose(dml_irm_fixture['coef'],
                         dml_irm_fixture['coef_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_fixture['coef'],
+                        dml_irm_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -124,7 +152,9 @@ def test_dml_irm_se(dml_irm_fixture):
     assert math.isclose(dml_irm_fixture['se'],
                         dml_irm_fixture['se_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
-
+    assert math.isclose(dml_irm_fixture['se'],
+                        dml_irm_fixture['se_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 @pytest.mark.ci
 def test_dml_irm_boot(dml_irm_fixture):
@@ -132,9 +162,15 @@ def test_dml_irm_boot(dml_irm_fixture):
         assert np.allclose(dml_irm_fixture['boot_coef' + bootstrap],
                            dml_irm_fixture['boot_coef' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_irm_fixture['boot_coef' + bootstrap],
+                           dml_irm_fixture['boot_coef' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
         assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
                            dml_irm_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_irm_fixture['boot_t_stat' + bootstrap],
+                           dml_irm_fixture['boot_t_stat' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci

From d280a78765570254f6d9294e0490c67dcdf0cda0 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 26 Apr 2023 16:57:21 +0200
Subject: [PATCH 006/134] refactor IRM nuisance est and fix unit tests

---
 doubleml/double_ml_cvar.py                   |  2 +-
 doubleml/double_ml_iivm.py                   |  2 +-
 doubleml/double_ml_irm.py                    | 33 +++++++++++---------
 doubleml/double_ml_lpq.py                    |  2 +-
 doubleml/double_ml_pliv.py                   |  2 +-
 doubleml/double_ml_plr.py                    |  2 +-
 doubleml/double_ml_pq.py                     |  2 +-
 doubleml/tests/test_nonlinear_score_mixin.py |  2 +-
 8 files changed, 25 insertions(+), 22 deletions(-)

diff --git a/doubleml/double_ml_cvar.py b/doubleml/double_ml_cvar.py
index 31f446ea..bc9d6af6 100644
--- a/doubleml/double_ml_cvar.py
+++ b/doubleml/double_ml_cvar.py
@@ -206,7 +206,7 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
                         for learner in ['ml_g', 'ml_m']}
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 4a8d727a..b710105b 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -241,7 +241,7 @@ def _check_data(self, obj_dml_data):
             raise ValueError(err_msg)
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, z = check_X_y(x, np.ravel(self._dml_data.z),
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 41a6b898..96912d5b 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -209,7 +209,12 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
 
         # nuisance g
-        if external_predictions['ml_g0'] is None:
+        if external_predictions['ml_g0'] is not None:
+            # use external predictions
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
             g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
                                         return_models=return_models)
@@ -224,14 +229,15 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                         f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
                                         'observed to be binary with values 0 and 1. Make sure that for classifiers '
                                         'probabilities and not labels are predicted.')
-        else:
-            g_hat0 = {'preds': external_predictions['ml_g0'],
-                      'targets': None,
-                      'models': None}
 
         g_hat1 = {'preds': None, 'targets': None, 'models': None}
         if (self.score == 'ATE') | callable(self.score):
-            if external_predictions['ml_g1'] is None:
+            if external_predictions['ml_g1'] is not None:
+                # use external predictions
+                g_hat1 = {'preds': external_predictions['ml_g1'],
+                          'targets': None,
+                          'models': None}
+            else:
                 g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
                                         return_models=return_models)
@@ -246,23 +252,20 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                         f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
                                         'observed to be binary with values 0 and 1. Make sure that for classifiers '
                                         'probabilities and not labels are predicted.')
-            else:
-                g_hat1 = {'preds': external_predictions['ml_g1'],
-                          'targets': None,
-                          'models': None}
 
         # nuisance m
-        if external_predictions['ml_m'] is None:
+        if external_predictions['ml_m'] is not None:
+            # use external predictions
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
             m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
                                     return_models=return_models)
             _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
             _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
             m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
-        else:
-            m_hat = {'preds': external_predictions['ml_m'],
-                     'targets': None,
-                     'models': None}
 
         psi_a, psi_b = self._score_elements(y, d,
                                             g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 97aebb23..72663c06 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -276,7 +276,7 @@ def _initialize_ml_nuisance_params(self):
                         for learner in ['ml_m_z', 'ml_g_du_z0', 'ml_g_du_z1',
                                         'ml_m_d_z0', 'ml_m_d_z1']}
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index 88369250..1ad57d24 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -292,7 +292,7 @@ def set_ml_nuisance_params(self, learner, treat_var, params):
             learner = 'ml_l'
         super(DoubleMLPLIV, self).set_ml_nuisance_params(learner, treat_var, params)
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         if self.partialX & (not self.partialZ):
             psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, return_models)
         elif (not self.partialX) & self.partialZ:
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 4b7eb401..76bf603d 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -185,7 +185,7 @@ def set_ml_nuisance_params(self, learner, treat_var, params):
             learner = 'ml_l'
         super(DoubleMLPLR, self).set_ml_nuisance_params(learner, treat_var, params)
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index e59e2dda..70e79c00 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -254,7 +254,7 @@ def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
                         for learner in ['ml_g', 'ml_m']}
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py
index 92210085..77d8dfe6 100644
--- a/doubleml/tests/test_nonlinear_score_mixin.py
+++ b/doubleml/tests/test_nonlinear_score_mixin.py
@@ -99,7 +99,7 @@ def _check_score(self, score):
     def _check_data(self, obj_dml_data):
         pass
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,

From 437f85b3692b9870c4ff12b0b0ebd10c56eb9da6 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 8 May 2023 10:51:45 +0200
Subject: [PATCH 007/134] fix format

---
 doubleml/double_ml.py      |  4 ++--
 doubleml/double_ml_irm.py  | 20 ++++++++++----------
 doubleml/tests/test_irm.py |  3 ++-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 22799ecb..e276df81 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -480,7 +480,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
         external_predictions : None or dict
             If `None` all models for the learners are fitted and evaluated. If a dictionary containing predictions
             for a specific learner is supplied, the model will use the supplied nuisance predictions instead. Has to
-            be a nested dictionary where the keys refer to the treatment and the keys of the nested dictionarys refer to the 
+            be a nested dictionary where the keys refer to the treatment and the keys of the nested dictionarys refer to the
             corresponding learners.
             Default is `None`.
 
@@ -518,7 +518,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
             self._i_rep = i_rep
             for i_d in range(self._dml_data.n_treat):
                 self._i_treat = i_d
-                
+
                 # this step could be skipped for the single treatment variable case
                 if self._dml_data.n_treat > 1:
                     self._dml_data.set_x_d(self._dml_data.d_cols[i_d])
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 9f77de62..b52ece60 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -220,8 +220,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
             _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
             g_hat0['targets'] = _cond_targets(g_hat0['targets'], cond_sample=(d == 0))
 
@@ -230,9 +230,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 zero_one_preds = np.all((np.power(g_hat0['preds'], 2) - g_hat0['preds']) == 0)
                 if binary_preds & zero_one_preds:
                     raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                        f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                        'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                        'probabilities and not labels are predicted.')
+                                     f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                     'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                     'probabilities and not labels are predicted.')
 
         g_hat1 = {'preds': None, 'targets': None, 'models': None}
         if (self.score == 'ATE') | callable(self.score):
@@ -243,8 +243,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                           'models': None}
             else:
                 g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                         est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                         return_models=return_models)
                 _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
                 g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
 
@@ -253,9 +253,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                     zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0)
                     if binary_preds & zero_one_preds:
                         raise ValueError(f'For the binary outcome variable {self._dml_data.y_col}, '
-                                        f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
-                                        'observed to be binary with values 0 and 1. Make sure that for classifiers '
-                                        'probabilities and not labels are predicted.')
+                                         f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
+                                         'observed to be binary with values 0 and 1. Make sure that for classifiers '
+                                         'probabilities and not labels are predicted.')
 
         # nuisance m
         if external_predictions['ml_m'] is not None:
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 4592e167..992eb28f 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -100,7 +100,7 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
 
     # synchronize the sample splitting
     dml_irm_obj_ext.set_sample_splitting(all_smpls=all_smpls)
- 
+
     prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1),
                              'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1),
                              'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1)}}
@@ -156,6 +156,7 @@ def test_dml_irm_se(dml_irm_fixture):
                         dml_irm_fixture['se_ext'],
                         rel_tol=1e-9, abs_tol=1e-4)
 
+
 @pytest.mark.ci
 def test_dml_irm_boot(dml_irm_fixture):
     for bootstrap in dml_irm_fixture['boot_methods']:

From 7287d243d438c3ca122a3faf25932f0b0a4fa33d Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 8 May 2023 11:15:47 +0200
Subject: [PATCH 008/134] update nuisance_est input DID and DIDCS

---
 doubleml/double_ml_did.py    | 2 +-
 doubleml/double_ml_did_cs.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 199b08d7..41317545 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -192,7 +192,7 @@ def _check_data(self, obj_dml_data):
                              'needs to be specified as treatment variable.')
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index 2be66b41..fdde41ff 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -205,7 +205,7 @@ def _check_data(self, obj_dml_data):
 
         return
 
-    def _nuisance_est(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,

From 4530983836ae2edf3a8381490cf9271358428acc Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Mon, 12 Jun 2023 17:18:52 +0200
Subject: [PATCH 009/134] Adjusted DoubleMLPLR class for external prediction

---
 doubleml/double_ml_plr.py | 39 +++++++++++++++++++++++++++------------
 1 file changed, 27 insertions(+), 12 deletions(-)

diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 5704aec7..aa5b24df 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -181,16 +181,26 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
 
         # nuisance l
-        l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                return_models=return_models)
-        _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
+        if external_predictions['ml_l'] is not None:
+            l_hat = {'preds': external_predictions['ml_l'],
+                      'targets': None,
+                      'models': None}
+        else:
+            l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
+                                    return_models=return_models)
+            _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+        if external_predictions['ml_m'] is not None:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
         if self._check_learner(self._learner['ml_m'], 'ml_m', regressor=True, classifier=True):
             _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
 
@@ -211,10 +221,15 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
             theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
             # nuisance g
-            g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
-            _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            if external_predictions['ml_g'] is not None:
+                g_hat = {'preds': external_predictions['ml_g'],
+                        'targets': None,
+                        'models': None}
+            else:
+                g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+                _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
 
         psi_a, psi_b = self._score_elements(y, d, l_hat['preds'], m_hat['preds'], g_hat['preds'], smpls)
         psi_elements = {'psi_a': psi_a,

From e369026ad5a04d5635c3b2977c4762f41201896b Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 13 Jun 2023 08:40:18 +0200
Subject: [PATCH 010/134] External predictions added to test cases for PLR

---
 doubleml/tests/test_plr.py | 49 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index 7b8b56d9..17eacfe2 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -76,11 +76,44 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
 
     res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner),
                          all_smpls, dml_procedure, score)
+    
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    if score == 'partialling out':
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m, ml_g,
+                                      n_folds,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+
+    # synchronize the sample splitting
+    dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+
+    if score == 'partialling out':
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1)}}
+    else:
+        assert score == 'IV-type'
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1)}}
+        
+    dml_plr_obj_ext.fit(external_predictions=prediction_dict)
+
 
     res_dict = {'coef': dml_plr_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_plr_obj_ext.coef,
                 'se': dml_plr_obj.se,
                 'se_manual': res_manual['se'],
+                'se_ext': dml_plr_obj_ext.se,
                 'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
@@ -91,10 +124,14 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
 
         np.random.seed(3141)
         dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
+        np.random.seed(3141)
+        dml_plr_obj_ext.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot)
         res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef
         res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat
         res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta
         res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat
+        res_dict['boot_coef' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_coef
+        res_dict['boot_t_stat' + bootstrap + '_ext'] = dml_plr_obj_ext.boot_t_stat
 
     return res_dict
 
@@ -104,6 +141,9 @@ def test_dml_plr_coef(dml_plr_fixture):
     assert math.isclose(dml_plr_fixture['coef'],
                         dml_plr_fixture['coef_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture['coef'],
+                        dml_plr_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -111,6 +151,9 @@ def test_dml_plr_se(dml_plr_fixture):
     assert math.isclose(dml_plr_fixture['se'],
                         dml_plr_fixture['se_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_plr_fixture['se'],
+                        dml_plr_fixture['se_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
@@ -119,9 +162,15 @@ def test_dml_plr_boot(dml_plr_fixture):
         assert np.allclose(dml_plr_fixture['boot_coef' + bootstrap],
                            dml_plr_fixture['boot_coef' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_plr_fixture['boot_coef' + bootstrap],
+                           dml_plr_fixture['boot_coef' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
         assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
                            dml_plr_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+        assert np.allclose(dml_plr_fixture['boot_t_stat' + bootstrap],
+                           dml_plr_fixture['boot_t_stat' + bootstrap + '_ext'],
+                           rtol=1e-9, atol=1e-4)
 
 
 @pytest.fixture(scope="module")

From fe3d862e2b2e873555b93eea9c8432d3fcc7aa25 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 13 Jun 2023 13:01:49 +0200
Subject: [PATCH 011/134] Excluding testfile from staging

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 41a409eb..7dae397b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ share/python-wheels/
 MANIFEST
 *.idea
 *.vscode
+test_plr_basic.py

From f1e112766bea04d05c55592a11d3d253174ed81d Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <68404051+JanTeichertKluge@users.noreply.github.com>
Date: Tue, 13 Jun 2023 13:41:01 +0200
Subject: [PATCH 012/134] Update .gitignore

removing testfile
---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 7dae397b..41a409eb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,4 +29,3 @@ share/python-wheels/
 MANIFEST
 *.idea
 *.vscode
-test_plr_basic.py

From e4573441e216c17e50b27e98c2856fca465d1c1e Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 09:21:55 +0200
Subject: [PATCH 013/134] minor change according to n_rep > 1

---
 doubleml/double_ml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index e276df81..4fecfe80 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1021,8 +1021,8 @@ def _check_external_predictions(self, external_predictions):
                 raise TypeError('external_predictions must be a dictionary. '
                                 f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
-            if self.n_rep > 1:
-                raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
+            # if self.n_rep > 1:
+            #     raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
 
             supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols

From 84aa99ff0f1916dcdc198b988203489a0b1261f9 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 09:39:46 +0200
Subject: [PATCH 014/134] .

---
 doubleml/double_ml.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 4fecfe80..e276df81 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1021,8 +1021,8 @@ def _check_external_predictions(self, external_predictions):
                 raise TypeError('external_predictions must be a dictionary. '
                                 f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
-            # if self.n_rep > 1:
-            #     raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
+            if self.n_rep > 1:
+                raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
 
             supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols

From 5cfc73cd3ea07cc53576dde91d750471b0002483 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 10:33:22 +0200
Subject: [PATCH 015/134] n_rep > 1 are now supported by double_ml.py

---
 doubleml/double_ml.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index e276df81..05b6059c 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -529,7 +529,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     if external_predictions is None:
                         ext_prediction_dict[learner] = None
                     elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
-                        ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner]
+                        ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
                     else:
                         ext_prediction_dict[learner] = None
 
@@ -1021,8 +1021,8 @@ def _check_external_predictions(self, external_predictions):
                 raise TypeError('external_predictions must be a dictionary. '
                                 f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
-            if self.n_rep > 1:
-                raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
+            # if self.n_rep > 1:
+            #     raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
 
             supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols
@@ -1052,7 +1052,7 @@ def _check_external_predictions(self, external_predictions):
                                         ' and learner ' + str(learner) + '. ' +
                                         f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
 
-                    expected_shape = (self._dml_data.n_obs, )
+                    expected_shape = (self._dml_data.n_obs, self.n_rep)
                     if external_predictions[treatment][learner].shape != expected_shape:
                         raise ValueError('Invalid external_predictions. '
                                          f'The supplied predictions have to be of shape {str(expected_shape)}. '

From 78b0bba144f4c076fce8810b3ba7f0d493eb26b9 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 13:04:02 +0200
Subject: [PATCH 016/134] Update double_ml.py

---
 doubleml/double_ml.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 05b6059c..d0d25ee3 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1021,9 +1021,6 @@ def _check_external_predictions(self, external_predictions):
                 raise TypeError('external_predictions must be a dictionary. '
                                 f'{str(external_predictions)} of type {str(type(external_predictions))} was passed.')
 
-            # if self.n_rep > 1:
-            #     raise NotImplementedError('external_predictions is not yet implmented for ``n_rep > 1``.')
-
             supplied_treatments = list(external_predictions.keys())
             valid_treatments = self._dml_data.d_cols
             if not set(supplied_treatments).issubset(valid_treatments):

From cd16290d8e3722c7640cdfcefc1fa334e2c23f90 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 14:11:48 +0200
Subject: [PATCH 017/134] Addition / adaptation of the test files

---
 doubleml/tests/test_irm.py           |  6 ++---
 doubleml/tests/test_plr.py           | 10 ++++----
 doubleml/tests/test_plr_rep_cross.py | 37 ++++++++++++++++++++++++++--
 3 files changed, 43 insertions(+), 10 deletions(-)

diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 992eb28f..24a5c240 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -101,9 +101,9 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
     # synchronize the sample splitting
     dml_irm_obj_ext.set_sample_splitting(all_smpls=all_smpls)
 
-    prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1),
-                             'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1),
-                             'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1)}}
+    prediction_dict = {'d': {'ml_g0': dml_irm_obj.predictions['ml_g0'].reshape(-1, 1),
+                             'ml_g1': dml_irm_obj.predictions['ml_g1'].reshape(-1, 1),
+                             'ml_m': dml_irm_obj.predictions['ml_m'].reshape(-1, 1)}}
     dml_irm_obj_ext.fit(external_predictions=prediction_dict)
 
     res_dict = {'coef': dml_irm_obj.coef,
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index 17eacfe2..7177e872 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -97,13 +97,13 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
     dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
 
     if score == 'partialling out':
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1)}}
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1)}}
     else:
         assert score == 'IV-type'
-        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1),
-                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1),
-                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1)}}
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, 1)}}
         
     dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
diff --git a/doubleml/tests/test_plr_rep_cross.py b/doubleml/tests/test_plr_rep_cross.py
index f2a50e21..4f95f10a 100644
--- a/doubleml/tests/test_plr_rep_cross.py
+++ b/doubleml/tests/test_plr_rep_cross.py
@@ -74,13 +74,46 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep):
 
     res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner),
                          all_smpls, dml_procedure, score, n_rep)
+    
+    np.random.seed(3141)
+    # test with external nuisance predictions
+    if score == 'partialling out':
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m,
+                                      n_folds,
+                                      n_rep,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+    else:
+        assert score == 'IV-type'
+        dml_plr_obj_ext = dml.DoubleMLPLR(obj_dml_data,
+                                      ml_l, ml_m, ml_g,
+                                      n_folds,
+                                      n_rep,
+                                      score=score,
+                                      dml_procedure=dml_procedure)
+
+    # synchronize the sample splitting
+    dml_plr_obj_ext.set_sample_splitting(all_smpls=all_smpls)
+
+    if score == 'partialling out':
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep)}}
+    else:
+        assert score == 'IV-type'
+        prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
+                                 'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep),
+                                 'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, n_rep)}}
+        
+    dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
     res_dict = {'coef': dml_plr_obj.coef,
                 'coef_manual': res_manual['theta'],
+                'coef_ext': dml_plr_obj_ext.coef,
                 'se': dml_plr_obj.se,
                 'se_manual': res_manual['se'],
-                'boot_methods': boot_methods
-                }
+                'se_ext': dml_plr_obj_ext.se,
+                'boot_methods': boot_methods}
 
     for bootstrap in boot_methods:
         np.random.seed(3141)

From 75dfd1e7d94207419d92d4e16131b17c196ecfcb Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 14 Jun 2023 14:50:08 +0200
Subject: [PATCH 018/134] Changes double_ml to pass partly ext. predictions

---
 doubleml/double_ml.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index d0d25ee3..8402080c 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -529,7 +529,10 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     if external_predictions is None:
                         ext_prediction_dict[learner] = None
                     elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
-                        ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
+                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray):
+                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
+                        else:
+                            ext_prediction_dict[learner] = None
                     else:
                         ext_prediction_dict[learner] = None
 
@@ -1041,13 +1044,13 @@ def _check_external_predictions(self, external_predictions):
                                      f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
                                      'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
 
-                for learner in supplied_learners:
-                    if not isinstance(external_predictions[treatment][learner],  np.ndarray):
-                        raise TypeError('Invalid external_predictions. '
-                                        'The values of the nested list must be a numpy array. '
-                                        'Invalid predictions for treatment ' + str(treatment) +
-                                        ' and learner ' + str(learner) + '. ' +
-                                        f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
+                # for learner in supplied_learners:
+                #     if not isinstance(external_predictions[treatment][learner],  np.ndarray):
+                #         raise TypeError('Invalid external_predictions. '
+                #                         'The values of the nested list must be a numpy array. '
+                #                         'Invalid predictions for treatment ' + str(treatment) +
+                #                         ' and learner ' + str(learner) + '. ' +
+                #                         f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
 
                     expected_shape = (self._dml_data.n_obs, self.n_rep)
                     if external_predictions[treatment][learner].shape != expected_shape:

From 87955c1bad126e180cbdf31f7b58c7558e967b34 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 15 Jun 2023 16:44:29 +0200
Subject: [PATCH 019/134] new testfile for ext_preds

---
 doubleml/tests/test_external_predictions.py | 80 +++++++++++++++++++++
 1 file changed, 80 insertions(+)
 create mode 100644 doubleml/tests/test_external_predictions.py

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
new file mode 100644
index 00000000..43a0cba6
--- /dev/null
+++ b/doubleml/tests/test_external_predictions.py
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV
+from doubleml import DoubleMLPLR, DoubleMLData
+from doubleml.datasets import make_plr_CCDDHNR2018
+
+
+# @pytest.fixture(scope='module',
+#                 params=[LinearRegression(),
+#                         LassoCV()])
+# def learner(request):
+#     return request.param
+
+@pytest.fixture(scope='module',
+                params=['IV-type', 'partialling out'])
+def score(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=['dml1', 'dml2'])
+def dml_procedure(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def adapted_doubleml_fixture(learner, score, dml_procedure):
+    ext_predictions = {'d': {}}
+
+    x, y, d = make_plr_CCDDHNR2018(n_obs=500,
+                                   dim_x=20,
+                                   alpha=0.5,
+                                   return_type="np.array")
+
+    lm_m1 = LinearRegression()
+    lm_l1 = LinearRegression()
+
+    np.random.seed(3141)
+    lm_m1.fit(x, d)
+    ext_predictions['d']['ml_m'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
+
+    lm_l1.fit(x, y)
+    ext_predictions['d']['ml_l'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
+
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    DMLPLR = DoubleMLPLR(obj_dml_data=dml_data,
+                         ml_m=learner,
+                         ml_l=learner,
+                         score=score,
+                         n_rep=n_rep,
+                         dml_procedure=dml_procedure)
+    np.random.seed(3141)
+    DMLPLR.fit(store_predictions=True)
+
+    DMLPLR_ext = DoubleMLPLR(obj_dml_data=dml_data,
+                             ml_m=learner,
+                             ml_l=learner,
+                             score=score,
+                             n_rep=n_rep,
+                             dml_procedure=dml_procedure)
+
+    np.random.seed(3141)
+    DMLPLR_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {'coef_normal': DMLPLR.coef,
+                'coef_ext': DMLPLR_ext.coef}
+
+    return res_dict
+
+@pytest.mark.ci
+def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+    assert math.isclose(adapted_doubleml_fixture['coef_normal'],
+                        adapted_doubleml_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
\ No newline at end of file

From f64fb575fb66224306881e18cfa25a021fd6265f Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 20 Jun 2023 15:53:18 +0200
Subject: [PATCH 020/134] new testcases / change dml.py

---
 doubleml/double_ml.py                       |  4 ++-
 doubleml/tests/test_external_predictions.py | 29 ++++++++++++---------
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 8402080c..d2ec3d06 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -529,8 +529,10 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     if external_predictions is None:
                         ext_prediction_dict[learner] = None
                     elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
-                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray):
+                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray) and self.n_rep > 1:
                             ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
+                        elif isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray) and self.n_rep == 1:
+                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner]
                         else:
                             ext_prediction_dict[learner] = None
                     else:
diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index 43a0cba6..d1c681d7 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -5,7 +5,6 @@
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
 
-
 # @pytest.fixture(scope='module',
 #                 params=[LinearRegression(),
 #                         LassoCV()])
@@ -29,7 +28,7 @@ def n_rep(request):
 
 
 @pytest.fixture(scope="module")
-def adapted_doubleml_fixture(learner, score, dml_procedure):
+def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     ext_predictions = {'d': {}}
 
     x, y, d = make_plr_CCDDHNR2018(n_obs=500,
@@ -37,30 +36,36 @@ def adapted_doubleml_fixture(learner, score, dml_procedure):
                                    alpha=0.5,
                                    return_type="np.array")
 
-    lm_m1 = LinearRegression()
-    lm_l1 = LinearRegression()
+    # lm_m1 = LinearRegression()
+    # lm_l1 = LinearRegression()
 
     np.random.seed(3141)
-    lm_m1.fit(x, d)
-    ext_predictions['d']['ml_m'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
 
-    lm_l1.fit(x, y)
-    ext_predictions['d']['ml_l'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
+    # lm_m1.fit(x, d)
+    # ext_predictions['d']['ml_m'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
+
+    # lm_l1.fit(x, y)
+    # ext_predictions['d']['ml_l'] = np.stack([lm_l1.predict(x) for _ in range(n_rep)], axis=1)
 
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
 
     DMLPLR = DoubleMLPLR(obj_dml_data=dml_data,
-                         ml_m=learner,
-                         ml_l=learner,
+                         ml_m=LinearRegression(),
+                         ml_l=LinearRegression(),
                          score=score,
                          n_rep=n_rep,
                          dml_procedure=dml_procedure)
     np.random.seed(3141)
+
     DMLPLR.fit(store_predictions=True)
 
+    ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'].squeeze()
+    ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'].squeeze()
+
+
     DMLPLR_ext = DoubleMLPLR(obj_dml_data=dml_data,
-                             ml_m=learner,
-                             ml_l=learner,
+                             ml_m=LinearRegression(),
+                             ml_l=LinearRegression(),
                              score=score,
                              n_rep=n_rep,
                              dml_procedure=dml_procedure)

From f72d6b984fd6e93a82a1d0700caab77f8b2f136e Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 27 Jun 2023 15:59:42 +0200
Subject: [PATCH 021/134] Fix testcases for external predictions

---
 doubleml/tests/test_external_predictions.py | 53 +++++++++++----------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index d1c681d7..5aa09b61 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -5,11 +5,17 @@
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
 
-# @pytest.fixture(scope='module',
-#                 params=[LinearRegression(),
-#                         LassoCV()])
-# def learner(request):
-#     return request.param
+class dummy_learner:
+    _estimator_type = "regressor"
+    def fit(*args):
+        raise AttributeError("Accessed fit method!")
+    def predict(*args):
+        raise AttributeError("Accessed predict method!")
+    def set_params(*args):
+        raise AttributeError("Accessed set_params method!")
+    def get_params(*args, **kwargs):
+        raise AttributeError("Accessed get_params method!")
+    
 
 @pytest.fixture(scope='module',
                 params=['IV-type', 'partialling out'])
@@ -36,25 +42,21 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
                                    alpha=0.5,
                                    return_type="np.array")
 
-    # lm_m1 = LinearRegression()
-    # lm_l1 = LinearRegression()
-
     np.random.seed(3141)
 
-    # lm_m1.fit(x, d)
-    # ext_predictions['d']['ml_m'] = np.stack([lm_m1.predict(x) for _ in range(n_rep)], axis=1)
-
-    # lm_l1.fit(x, y)
-    # ext_predictions['d']['ml_l'] = np.stack([lm_l1.predict(x) for _ in range(n_rep)], axis=1)
-
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
 
-    DMLPLR = DoubleMLPLR(obj_dml_data=dml_data,
-                         ml_m=LinearRegression(),
+    kwargs = {'obj_dml_data': dml_data,
+              'score': score,
+              'n_rep': n_rep,
+              'dml_procedure': dml_procedure}
+    
+    if score == 'IV-type':
+        kwargs['ml_g'] = LinearRegression()
+    
+    DMLPLR = DoubleMLPLR(ml_m=LinearRegression(),
                          ml_l=LinearRegression(),
-                         score=score,
-                         n_rep=n_rep,
-                         dml_procedure=dml_procedure)
+                         **kwargs)
     np.random.seed(3141)
 
     DMLPLR.fit(store_predictions=True)
@@ -62,13 +64,14 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'].squeeze()
     ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'].squeeze()
 
+    if score == 'IV-type':
+        kwargs['ml_g'] = dummy_learner()
+        ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'].squeeze()
+
 
-    DMLPLR_ext = DoubleMLPLR(obj_dml_data=dml_data,
-                             ml_m=LinearRegression(),
-                             ml_l=LinearRegression(),
-                             score=score,
-                             n_rep=n_rep,
-                             dml_procedure=dml_procedure)
+    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_learner(),
+                             ml_l=dummy_learner(),
+                             **kwargs)
 
     np.random.seed(3141)
     DMLPLR_ext.fit(external_predictions=ext_predictions)

From 731325a110d448c5172004cef710fb04d64a5045 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Mon, 10 Jul 2023 15:30:30 +0200
Subject: [PATCH 022/134] Add external prediction option to PLIV model

---
 doubleml/double_ml.py                         |  14 +--
 doubleml/double_ml_pliv.py                    |  72 ++++++++-----
 doubleml/double_ml_plr.py                     |   4 +-
 .../tests/test_external_predictions_IV.py     | 102 ++++++++++++++++++
 4 files changed, 158 insertions(+), 34 deletions(-)
 create mode 100644 doubleml/tests/test_external_predictions_IV.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index d2ec3d06..9d0698e6 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -1054,13 +1054,13 @@ def _check_external_predictions(self, external_predictions):
                 #                         ' and learner ' + str(learner) + '. ' +
                 #                         f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
 
-                    expected_shape = (self._dml_data.n_obs, self.n_rep)
-                    if external_predictions[treatment][learner].shape != expected_shape:
-                        raise ValueError('Invalid external_predictions. '
-                                         f'The supplied predictions have to be of shape {str(expected_shape)}. '
-                                         'Invalid predictions for treatment ' + str(treatment) +
-                                         ' and learner ' + str(learner) + '. ' +
-                                         f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
+                #     expected_shape = (self._dml_data.n_obs, self.n_rep)
+                #     if external_predictions[treatment][learner].shape != expected_shape:
+                #         raise ValueError('Invalid external_predictions. '
+                #                          f'The supplied predictions have to be of shape {str(expected_shape)}. '
+                #                          'Invalid predictions for treatment ' + str(treatment) +
+                #                          ' and learner ' + str(learner) + '. ' +
+                #                          f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
 
     def _initialize_arrays(self):
         psi = np.full((self._dml_data.n_obs, self.n_rep, self._dml_data.n_coefs), np.nan)
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index 1ad57d24..2b87321f 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -293,13 +293,13 @@ def set_ml_nuisance_params(self, learner, treat_var, params):
         super(DoubleMLPLIV, self).set_ml_nuisance_params(learner, treat_var, params)
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        if self.partialX & (not self.partialZ):
-            psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, return_models)
+        if self.partialX & (not self.partialZ):          
+            psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, external_predictions, return_models)
         elif (not self.partialX) & self.partialZ:
-            psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, return_models)
+            psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, external_predictions, return_models)
         else:
             assert (self.partialX & self.partialZ)
-            psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, return_models)
+            psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, external_predictions, return_models)
 
         return psi_elements, preds
 
@@ -318,16 +318,21 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
 
         return res
 
-    def _nuisance_est_partial_x(self, smpls, n_jobs_cv, return_models=False):
+    def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y,
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
 
         # nuisance l
-        l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
-                                return_models=return_models)
+        if external_predictions['ml_l'] is not None:
+            l_hat = {'preds': external_predictions['ml_l'],
+                      'targets': None,
+                      'models': None}
+        else:
+            l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
+                                    return_models=return_models)
         _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
 
         predictions = {'ml_l': l_hat['preds']}
@@ -337,37 +342,54 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, return_models=False):
         if self._dml_data.n_instr == 1:
             # one instrument: just identified
             x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                             force_all_finite=False)
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
+                                force_all_finite=False)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                        'targets': None,
+                        'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
             predictions['ml_m'] = m_hat['preds']
             targets['ml_m'] = m_hat['targets']
             models['ml_m'] = m_hat['models']
         else:
             # several instruments: 2SLS
             m_hat = {'preds': np.full((self._dml_data.n_obs, self._dml_data.n_instr), np.nan),
+                     'targets': [None] * self._dml_data.n_instr,
                      'models': [None] * self._dml_data.n_instr}
-            z = self._dml_data.z
             for i_instr in range(self._dml_data.n_instr):
+                z = self._dml_data.z
                 x, this_z = check_X_y(x, z[:, i_instr],
-                                      force_all_finite=False)
-                res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
-                                                 est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
-                                                 method=self._predict_method['ml_m'], return_models=return_models)
+                                    force_all_finite=False)
+                if external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]] is not None:
+                    m_hat['preds'][:, i_instr] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
+                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
+                    targets['ml_m_' + self._dml_data.z_cols[i_instr]] = None
+                    models['ml_m_' + self._dml_data.z_cols[i_instr]] = None
+                else:
+                    res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
+                                                    est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
+                                                    method=self._predict_method['ml_m'], return_models=return_models)
 
-                m_hat['preds'][:, i_instr] = res_cv_predict['preds']
+                    m_hat['preds'][:, i_instr] = res_cv_predict['preds']
 
-                predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['preds']
-                targets['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['targets']
-                models['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['models']
+                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['preds']
+                    targets['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['targets']
+                    models['ml_m_' + self._dml_data.z_cols[i_instr]] = res_cv_predict['models']
 
         _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
 
         # nuisance r
-        r_hat = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
-                                return_models=return_models)
+        if external_predictions['ml_r'] is not None:
+            r_hat = {'preds': external_predictions['ml_r'],
+                     'targets': None,
+                     'models': None}
+        else:
+            r_hat = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_r'), method=self._predict_method['ml_r'],
+                                    return_models=return_models)
         _check_finite_predictions(r_hat['preds'], self._learner['ml_r'], 'ml_r', smpls)
         predictions['ml_r'] = r_hat['preds']
         targets['ml_r'] = r_hat['targets']
@@ -405,7 +427,7 @@ def _score_elements(self, y, z, d, l_hat, m_hat, r_hat, g_hat, smpls):
         # compute residuals
         u_hat = y - l_hat
         w_hat = d - r_hat
-        v_hat = z - m_hat
+        v_hat = z- m_hat
 
         r_hat_tilde = None
         if self._dml_data.n_instr > 1:
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index aa5b24df..ad1f0f1a 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -183,8 +183,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         # nuisance l
         if external_predictions['ml_l'] is not None:
             l_hat = {'preds': external_predictions['ml_l'],
-                      'targets': None,
-                      'models': None}
+                     'targets': None,
+                     'models': None}
         else:
             l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
new file mode 100644
index 00000000..550704d2
--- /dev/null
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -0,0 +1,102 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV
+from doubleml import DoubleMLPLIV, DoubleMLData
+from doubleml.datasets import make_pliv_CHS2015
+
+class dummy_learner:
+    _estimator_type = "regressor"
+    def fit(*args):
+        raise AttributeError("Accessed fit method!")
+    def predict(*args):
+        raise AttributeError("Accessed predict method!")
+    def set_params(*args):
+        raise AttributeError("Accessed set_params method!")
+    def get_params(*args, **kwargs):
+        raise AttributeError("Accessed get_params method!")
+    
+
+@pytest.fixture(scope='module',
+                params=['IV-type', 'partialling out'])
+def score(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=['dml1', 'dml2'])
+def dml_procedure(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=[1])
+def n_rep(request):
+    return request.param
+
+@pytest.fixture(scope='module',
+                params=[1, 3])
+def dim_z(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def adapted_doubleml_fixture(score, dml_procedure, n_rep):
+    ext_predictions = {'d': {}}
+
+    data = make_pliv_CHS2015(n_obs=500,
+							 dim_x=20,
+							 alpha=0.5,
+							 dim_z=dim_z,
+							 return_type="DataFrame")
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData(data, 'y', 'd', z_cols=[f"Z{i}" for i in range(1, dim_z+1)])
+
+    kwargs = {'obj_dml_data': dml_data,
+              'score': score,
+              'n_rep': n_rep,
+              'dml_procedure': dml_procedure}
+    
+    if score == 'IV-type':
+        kwargs['ml_g'] = LinearRegression()
+    
+    DMLPLIV = DoubleMLPLIV(ml_m=LinearRegression(),
+                           ml_l=LinearRegression(),
+                           ml_r=LinearRegression(),
+                           **kwargs)
+    np.random.seed(3141)
+
+    DMLPLIV.fit(store_predictions=True)
+
+    ext_predictions['d']['ml_l'] = DMLPLIV.predictions['ml_l'].squeeze()
+    ext_predictions['d']['ml_r'] = DMLPLIV.predictions['ml_r'].squeeze()
+
+    if dimz == 1:
+        ext_predictions['d']['ml_m'] = DMLPLIV.predictions['ml_m'].squeeze()
+    else:
+        for instr in range(dimz):
+            ext_predictions['d']['ml_m_' + 'Z' + str(instr+1)] = DMLPLIV.predictions['ml_m_' + 'Z' + str(instr+1)].squeeze()
+
+        if score == 'IV-type':
+            kwargs['ml_g'] = dummy_learner()
+            ext_predictions['d']['ml_g'] = DMLPLIV.predictions['ml_g'].squeeze()
+
+
+    DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
+                               ml_l=dummy_learner(),
+                               ml_r=dummy_learner(),
+                               **kwargs)
+
+    np.random.seed(3141)
+    DMLPLR_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {'coef_normal': DMLPLIV.coef,
+                'coef_ext': DMLPLIV_ext.coef}
+
+    return res_dict
+
+@pytest.mark.ci
+def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+    assert math.isclose(adapted_doubleml_fixture['coef_normal'],
+                        adapted_doubleml_fixture['coef_ext'],
+                        rel_tol=1e-9, abs_tol=1e-4)
\ No newline at end of file

From d96f28cf8178f5b9f2cfede01bd96cce501c1792 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Mon, 10 Jul 2023 16:33:31 +0200
Subject: [PATCH 023/134] Fix PLIV model for IV-type score and add testcases

---
 doubleml/double_ml_pliv.py                    |  19 +--
 .../tests/test_external_predictions_IV.py     | 113 ++++++++++--------
 2 files changed, 72 insertions(+), 60 deletions(-)

diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index 2b87321f..34d4fe29 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -399,13 +399,18 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
         if (self._dml_data.n_instr == 1) & ('ml_g' in self._learner):
             # an estimate of g is obtained for the IV-type score and callable scores
             # get an initial estimate for theta using the partialling out score
-            psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
-            psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
-            theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
-            # nuisance g
-            g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial * d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+            if external_predictions['ml_g'] is not None:
+                g_hat = {'preds': external_predictions['ml_g'],
+                        'targets': None,
+                        'models': None}
+            else:
+                psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
+                psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
+                theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
+                # nuisance g
+                g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial * d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
             _check_finite_predictions(g_hat['preds'], self._learner['ml_g'], 'ml_g', smpls)
 
         predictions['ml_g'] = g_hat['preds']
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
index 550704d2..b92cd2e6 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -18,7 +18,7 @@ def get_params(*args, **kwargs):
     
 
 @pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+                params=['partialling out', 'IV-type'])
 def score(request):
     return request.param
 
@@ -28,7 +28,7 @@ def dml_procedure(request):
     return request.param
 
 @pytest.fixture(scope='module',
-                params=[1])
+                params=[1, 3])
 def n_rep(request):
     return request.param
 
@@ -37,63 +37,70 @@ def n_rep(request):
 def dim_z(request):
     return request.param
 
-
 @pytest.fixture(scope="module")
-def adapted_doubleml_fixture(score, dml_procedure, n_rep):
-    ext_predictions = {'d': {}}
-
-    data = make_pliv_CHS2015(n_obs=500,
-							 dim_x=20,
-							 alpha=0.5,
-							 dim_z=dim_z,
-							 return_type="DataFrame")
-
-    np.random.seed(3141)
-
-    dml_data = DoubleMLData(data, 'y', 'd', z_cols=[f"Z{i}" for i in range(1, dim_z+1)])
-
-    kwargs = {'obj_dml_data': dml_data,
-              'score': score,
-              'n_rep': n_rep,
-              'dml_procedure': dml_procedure}
-    
-    if score == 'IV-type':
-        kwargs['ml_g'] = LinearRegression()
-    
-    DMLPLIV = DoubleMLPLIV(ml_m=LinearRegression(),
-                           ml_l=LinearRegression(),
-                           ml_r=LinearRegression(),
-                           **kwargs)
-    np.random.seed(3141)
-
-    DMLPLIV.fit(store_predictions=True)
-
-    ext_predictions['d']['ml_l'] = DMLPLIV.predictions['ml_l'].squeeze()
-    ext_predictions['d']['ml_r'] = DMLPLIV.predictions['ml_r'].squeeze()
-
-    if dimz == 1:
-        ext_predictions['d']['ml_m'] = DMLPLIV.predictions['ml_m'].squeeze()
+def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
+
+    # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
+    if dim_z > 1 and score == 'IV-type':
+        res_dict = {'coef_normal': 1,
+                    'coef_ext': 1}
+        
+        return res_dict
     else:
-        for instr in range(dimz):
-            ext_predictions['d']['ml_m_' + 'Z' + str(instr+1)] = DMLPLIV.predictions['ml_m_' + 'Z' + str(instr+1)].squeeze()
+        ext_predictions = {'d': {}}
 
-        if score == 'IV-type':
-            kwargs['ml_g'] = dummy_learner()
-            ext_predictions['d']['ml_g'] = DMLPLIV.predictions['ml_g'].squeeze()
+        data = make_pliv_CHS2015(n_obs=500,
+                                 dim_x=20,
+                                 alpha=0.5,
+                                 dim_z=dim_z,
+                                 return_type="DataFrame")
 
+        np.random.seed(3141)
 
-    DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
-                               ml_l=dummy_learner(),
-                               ml_r=dummy_learner(),
-                               **kwargs)
+        z_cols = [f"Z{i}" for i in range(1, dim_z+1)]
+        dml_data = DoubleMLData(data, 'y', 'd', z_cols=z_cols)
 
-    np.random.seed(3141)
-    DMLPLR_ext.fit(external_predictions=ext_predictions)
-
-    res_dict = {'coef_normal': DMLPLIV.coef,
-                'coef_ext': DMLPLIV_ext.coef}
-
-    return res_dict
+        kwargs = {'obj_dml_data': dml_data,
+                'score': score,
+                'n_rep': n_rep,
+                'dml_procedure': dml_procedure}
+        
+        if score == 'IV-type':
+            kwargs['ml_g'] = LinearRegression()
+        
+        DMLPLIV = DoubleMLPLIV(ml_m=LinearRegression(),
+                            ml_l=LinearRegression(),
+                            ml_r=LinearRegression(),
+                            **kwargs)
+        np.random.seed(3141)
+
+        DMLPLIV.fit(store_predictions=True)
+
+        ext_predictions['d']['ml_l'] = DMLPLIV.predictions['ml_l'].squeeze()
+        ext_predictions['d']['ml_r'] = DMLPLIV.predictions['ml_r'].squeeze()
+
+        if dim_z == 1:
+            ext_predictions['d']['ml_m'] = DMLPLIV.predictions['ml_m'].squeeze()
+            if score == 'IV-type':
+                kwargs['ml_g'] = dummy_learner()
+                ext_predictions['d']['ml_g'] = DMLPLIV.predictions['ml_g'].squeeze()
+        else:
+            for instr in range(dim_z):
+                ml_m_key = 'ml_m_' + 'Z' + str(instr+1)
+                ext_predictions['d'][ml_m_key] = DMLPLIV.predictions[ml_m_key].squeeze()
+
+        DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
+                                ml_l=dummy_learner(),
+                                ml_r=dummy_learner(),
+                                **kwargs)
+
+        np.random.seed(3141)
+        DMLPLIV_ext.fit(external_predictions=ext_predictions)
+
+        res_dict = {'coef_normal': DMLPLIV.coef,
+                    'coef_ext': DMLPLIV_ext.coef}
+
+        return res_dict
 
 @pytest.mark.ci
 def test_adapted_doubleml_coef(adapted_doubleml_fixture):

From 102b27a6e13cb210a07d6c2712912d49f8b8219c Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Mon, 10 Jul 2023 20:52:23 +0200
Subject: [PATCH 024/134] Added external prediction option to DoubleMLDID

---
 doubleml/double_ml_did.py | 40 ++++++++++++++++++++++++++-------------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 41317545..c035e560 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -201,31 +201,45 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         # nuisance g
         # get train indices for d == 0
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
+        if external_predictions['ml_g0'] is not None:
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                    return_models=return_models)
 
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
         # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][d == 1] = np.nan
-
+            g_hat0['targets'] = g_hat0['targets'].astype(float) # is None if external predictions are used
+            g_hat0['targets'][d == 1] = np.nan
+        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
         # only relevant for observational or experimental setting
         m_hat = {'preds': None, 'targets': None, 'models': None}
         g_hat1 = {'preds': None, 'targets': None, 'models': None}
         if self.score == 'observational':
             # nuisance m
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                        'targets': None,
+                        'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
             _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
             _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
             m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
 
         if self.score == 'experimental':
-            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                     return_models=return_models)
+            if external_predictions['ml_g1'] is not None:
+                g_hat1 = {'preds': external_predictions['ml_g1'],
+                        'targets': None,
+                        'models': None}
+            else:
+                g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
 
             _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples

From d90dc8fc96fa83464337eec38e6c229e90c878cf Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 19 Jul 2023 14:22:01 +0200
Subject: [PATCH 025/134] Update test_external_predictions_IV.py

---
 doubleml/tests/test_external_predictions_IV.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
index b92cd2e6..e871c102 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -90,8 +90,8 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
                 ext_predictions['d'][ml_m_key] = DMLPLIV.predictions[ml_m_key].squeeze()
 
         DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
-                                ml_l=dummy_learner(),
-                                ml_r=dummy_learner(),
+                                   ml_l=dummy_learner(),
+                                   ml_r=dummy_learner(),
                                 **kwargs)
 
         np.random.seed(3141)

From af0c039f3c596d5d873a1be76f28e7f3ba5929a6 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 19 Jul 2023 17:41:13 +0200
Subject: [PATCH 026/134] add restriction to external predictions (matrix)

---
 .gitignore            |  1 +
 doubleml/double_ml.py | 36 +++++++++++++++++-------------------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/.gitignore b/.gitignore
index 41a409eb..d9ffb93c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -29,3 +29,4 @@ share/python-wheels/
 MANIFEST
 *.idea
 *.vscode
+.flake8
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 6f9f471e..217c802e 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -561,10 +561,8 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
                     if external_predictions is None:
                         ext_prediction_dict[learner] = None
                     elif learner in external_predictions[self._dml_data.d_cols[i_d]].keys():
-                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray) and self.n_rep > 1:
-                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][0:, i_rep]
-                        elif isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray) and self.n_rep == 1:
-                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner]
+                        if isinstance(external_predictions[self._dml_data.d_cols[i_d]][learner], np.ndarray):
+                            ext_prediction_dict[learner] = external_predictions[self._dml_data.d_cols[i_d]][learner][:, i_rep]
                         else:
                             ext_prediction_dict[learner] = None
                     else:
@@ -1090,21 +1088,21 @@ def _check_external_predictions(self, external_predictions):
                                      f'Invalid nuisance learner for treatment {str(treatment)} in {str(supplied_learners)}. '
                                      'Valid nuisance learners ' + ' or '.join(valid_learners) + '.')
 
-                # for learner in supplied_learners:
-                #     if not isinstance(external_predictions[treatment][learner],  np.ndarray):
-                #         raise TypeError('Invalid external_predictions. '
-                #                         'The values of the nested list must be a numpy array. '
-                #                         'Invalid predictions for treatment ' + str(treatment) +
-                #                         ' and learner ' + str(learner) + '. ' +
-                #                         f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
-
-                #     expected_shape = (self._dml_data.n_obs, self.n_rep)
-                #     if external_predictions[treatment][learner].shape != expected_shape:
-                #         raise ValueError('Invalid external_predictions. '
-                #                          f'The supplied predictions have to be of shape {str(expected_shape)}. '
-                #                          'Invalid predictions for treatment ' + str(treatment) +
-                #                          ' and learner ' + str(learner) + '. ' +
-                #                          f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
+                for learner in supplied_learners:
+                    if not isinstance(external_predictions[treatment][learner], np.ndarray):
+                        raise TypeError('Invalid external_predictions. '
+                                        'The values of the nested list must be a numpy array. '
+                                        'Invalid predictions for treatment ' + str(treatment) +
+                                        ' and learner ' + str(learner) + '. ' +
+                                        f'Object of type {str(type(external_predictions[treatment][learner]))} was passed.')
+
+                    expected_shape = (self._dml_data.n_obs, self.n_rep)
+                    if external_predictions[treatment][learner].shape != expected_shape:
+                        raise ValueError('Invalid external_predictions. '
+                                         f'The supplied predictions have to be of shape {str(expected_shape)}. '
+                                         'Invalid predictions for treatment ' + str(treatment) +
+                                         ' and learner ' + str(learner) + '. ' +
+                                         f'Predictions of shape {str(external_predictions[treatment][learner].shape)} passed.')
 
     def _initialize_arrays(self):
         # scores

From a3f218ca3ca6d67230015f79d1b62efc14df4199 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 19 Jul 2023 17:41:31 +0200
Subject: [PATCH 027/134] fix unit tests

---
 doubleml/double_ml_pliv.py                    |   4 +-
 doubleml/tests/test_doubleml_exceptions.py    |  17 +--
 doubleml/tests/test_external_predictions.py   |  10 +-
 .../tests/test_external_predictions_IV.py     | 111 ++++++++++--------
 4 files changed, 76 insertions(+), 66 deletions(-)

diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index eb14f946..e16caa94 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -287,10 +287,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         if self.partialX & (not self.partialZ):
             psi_elements, preds = self._nuisance_est_partial_x(smpls, n_jobs_cv, external_predictions, return_models)
         elif (not self.partialX) & self.partialZ:
-            psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, external_predictions, return_models)
+            psi_elements, preds = self._nuisance_est_partial_z(smpls, n_jobs_cv, return_models)
         else:
             assert (self.partialX & self.partialZ)
-            psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, external_predictions, return_models)
+            psi_elements, preds = self._nuisance_est_partial_xz(smpls, n_jobs_cv, return_models)
 
         return psi_elements, preds
 
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 3958f35a..57b3d32c 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -1328,11 +1328,6 @@ def test_double_ml_external_predictions():
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.fit(external_predictions="ml_m")
 
-    predictions = {'ml_f': 'test'}
-    msg = "external_predictions is not yet implmented for ``n_rep > 1``."
-    with pytest.raises(NotImplementedError, match=msg):
-        dml_irm_obj.fit(external_predictions=predictions)
-
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
                               ml_m=LogisticRegression(),
@@ -1377,15 +1372,23 @@ def test_double_ml_external_predictions():
 
     predictions = {'d': {'ml_m': np.array([0])}}
     msg = ('Invalid external_predictions. '
-           r'The supplied predictions have to be of shape \(100,\). '
+           r'The supplied predictions have to be of shape \(100, 1\). '
            'Invalid predictions for treatment d and learner ml_m. '
            r'Predictions of shape \(1,\) passed.')
     with pytest.raises(ValueError, match=msg):
         dml_irm_obj.fit(external_predictions=predictions)
 
+    predictions = {'d': {'ml_m': np.zeros(100)}}
+    msg = ('Invalid external_predictions. '
+           r'The supplied predictions have to be of shape \(100, 1\). '
+           'Invalid predictions for treatment d and learner ml_m. '
+           r'Predictions of shape \(100,\) passed.')
+    with pytest.raises(ValueError, match=msg):
+        dml_irm_obj.fit(external_predictions=predictions)
+
     predictions = {'d': {'ml_m': np.ones(shape=(5, 3))}}
     msg = ('Invalid external_predictions. '
-           r'The supplied predictions have to be of shape \(100,\). '
+           r'The supplied predictions have to be of shape \(100, 1\). '
            'Invalid predictions for treatment d and learner ml_m. '
            r'Predictions of shape \(5, 3\) passed.')
     with pytest.raises(ValueError, match=msg):
diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index 5aa09b61..07d408ee 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -50,10 +50,10 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
               'score': score,
               'n_rep': n_rep,
               'dml_procedure': dml_procedure}
-    
+
     if score == 'IV-type':
         kwargs['ml_g'] = LinearRegression()
-    
+
     DMLPLR = DoubleMLPLR(ml_m=LinearRegression(),
                          ml_l=LinearRegression(),
                          **kwargs)
@@ -61,12 +61,12 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
 
     DMLPLR.fit(store_predictions=True)
 
-    ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'].squeeze()
-    ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'].squeeze()
+    ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'][:, :, 0]
+    ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'][:, :, 0]
 
     if score == 'IV-type':
         kwargs['ml_g'] = dummy_learner()
-        ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'].squeeze()
+        ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'][:, :, 0]
 
 
     DMLPLR_ext = DoubleMLPLR(ml_m=dummy_learner(),
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
index e871c102..28e7feb6 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -5,105 +5,112 @@
 from doubleml import DoubleMLPLIV, DoubleMLData
 from doubleml.datasets import make_pliv_CHS2015
 
+
 class dummy_learner:
     _estimator_type = "regressor"
+
     def fit(*args):
         raise AttributeError("Accessed fit method!")
+
     def predict(*args):
         raise AttributeError("Accessed predict method!")
+
     def set_params(*args):
         raise AttributeError("Accessed set_params method!")
+
     def get_params(*args, **kwargs):
         raise AttributeError("Accessed get_params method!")
-    
 
-@pytest.fixture(scope='module',
-                params=['partialling out', 'IV-type'])
+
+@pytest.fixture(scope="module", params=["partialling out", "IV-type"])
 def score(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=['dml1', 'dml2'])
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
 def dml_procedure(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+
+@pytest.fixture(scope="module", params=[1, 3])
 def dim_z(request):
     return request.param
 
+
 @pytest.fixture(scope="module")
 def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
-
     # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
-    if dim_z > 1 and score == 'IV-type':
-        res_dict = {'coef_normal': 1,
-                    'coef_ext': 1}
-        
+    if dim_z > 1 and score == "IV-type":
+        res_dict = {"coef_normal": 1, "coef_ext": 1}
+
         return res_dict
     else:
-        ext_predictions = {'d': {}}
+        ext_predictions = {"d": {}}
 
-        data = make_pliv_CHS2015(n_obs=500,
-                                 dim_x=20,
-                                 alpha=0.5,
-                                 dim_z=dim_z,
-                                 return_type="DataFrame")
+        data = make_pliv_CHS2015(
+            n_obs=500, dim_x=20, alpha=0.5, dim_z=dim_z, return_type="DataFrame"
+        )
 
         np.random.seed(3141)
 
-        z_cols = [f"Z{i}" for i in range(1, dim_z+1)]
-        dml_data = DoubleMLData(data, 'y', 'd', z_cols=z_cols)
-
-        kwargs = {'obj_dml_data': dml_data,
-                'score': score,
-                'n_rep': n_rep,
-                'dml_procedure': dml_procedure}
-        
-        if score == 'IV-type':
-            kwargs['ml_g'] = LinearRegression()
-        
-        DMLPLIV = DoubleMLPLIV(ml_m=LinearRegression(),
-                            ml_l=LinearRegression(),
-                            ml_r=LinearRegression(),
-                            **kwargs)
+        z_cols = [f"Z{i}" for i in range(1, dim_z + 1)]
+        dml_data = DoubleMLData(data, "y", "d", z_cols=z_cols)
+
+        kwargs = {
+            "obj_dml_data": dml_data,
+            "score": score,
+            "n_rep": n_rep,
+            "dml_procedure": dml_procedure,
+        }
+
+        if score == "IV-type":
+            kwargs["ml_g"] = LinearRegression()
+
+        DMLPLIV = DoubleMLPLIV(
+            ml_m=LinearRegression(),
+            ml_l=LinearRegression(),
+            ml_r=LinearRegression(),
+            **kwargs,
+        )
         np.random.seed(3141)
 
         DMLPLIV.fit(store_predictions=True)
 
-        ext_predictions['d']['ml_l'] = DMLPLIV.predictions['ml_l'].squeeze()
-        ext_predictions['d']['ml_r'] = DMLPLIV.predictions['ml_r'].squeeze()
+        ext_predictions["d"]["ml_l"] = DMLPLIV.predictions["ml_l"][:, :, 0]
+        ext_predictions["d"]["ml_r"] = DMLPLIV.predictions["ml_r"][:, :, 0]
 
         if dim_z == 1:
-            ext_predictions['d']['ml_m'] = DMLPLIV.predictions['ml_m'].squeeze()
-            if score == 'IV-type':
-                kwargs['ml_g'] = dummy_learner()
-                ext_predictions['d']['ml_g'] = DMLPLIV.predictions['ml_g'].squeeze()
+            ext_predictions["d"]["ml_m"] = DMLPLIV.predictions["ml_m"][:, :, 0]
+            if score == "IV-type":
+                kwargs["ml_g"] = dummy_learner()
+                ext_predictions["d"]["ml_g"] = DMLPLIV.predictions["ml_g"][:, :, 0]
         else:
             for instr in range(dim_z):
-                ml_m_key = 'ml_m_' + 'Z' + str(instr+1)
-                ext_predictions['d'][ml_m_key] = DMLPLIV.predictions[ml_m_key].squeeze()
+                ml_m_key = "ml_m_" + "Z" + str(instr + 1)
+                ext_predictions["d"][ml_m_key] = DMLPLIV.predictions[ml_m_key][:, :, 0]
 
-        DMLPLIV_ext = DoubleMLPLIV(ml_m=dummy_learner(),
-                                   ml_l=dummy_learner(),
-                                   ml_r=dummy_learner(),
-                                **kwargs)
+        DMLPLIV_ext = DoubleMLPLIV(
+            ml_m=dummy_learner(), ml_l=dummy_learner(), ml_r=dummy_learner(), **kwargs
+        )
 
         np.random.seed(3141)
         DMLPLIV_ext.fit(external_predictions=ext_predictions)
 
-        res_dict = {'coef_normal': DMLPLIV.coef,
-                    'coef_ext': DMLPLIV_ext.coef}
+        res_dict = {"coef_normal": DMLPLIV.coef, "coef_ext": DMLPLIV_ext.coef}
 
         return res_dict
 
+
 @pytest.mark.ci
 def test_adapted_doubleml_coef(adapted_doubleml_fixture):
-    assert math.isclose(adapted_doubleml_fixture['coef_normal'],
-                        adapted_doubleml_fixture['coef_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
\ No newline at end of file
+    assert math.isclose(
+        adapted_doubleml_fixture["coef_normal"],
+        adapted_doubleml_fixture["coef_ext"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )

From 0fca1366be78e07d40b273e0d275206f21a42323 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 1 Sep 2023 12:08:03 +0200
Subject: [PATCH 028/134] add `dummy_learners` into a new `utils` submodule

---
 doubleml/tests/test_external_predictions.py   | 18 +++--------
 .../tests/test_external_predictions_IV.py     | 21 ++-----------
 doubleml/utils/__init__.py                    |  7 +++++
 doubleml/utils/dummy_learners.py              | 30 +++++++++++++++++++
 4 files changed, 44 insertions(+), 32 deletions(-)
 create mode 100644 doubleml/utils/__init__.py
 create mode 100644 doubleml/utils/dummy_learners.py

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index 07d408ee..26043da3 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -4,17 +4,7 @@
 from sklearn.linear_model import LinearRegression, LassoCV
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
-
-class dummy_learner:
-    _estimator_type = "regressor"
-    def fit(*args):
-        raise AttributeError("Accessed fit method!")
-    def predict(*args):
-        raise AttributeError("Accessed predict method!")
-    def set_params(*args):
-        raise AttributeError("Accessed set_params method!")
-    def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method!")
+from doubleml.utils import dummy_regressor
     
 
 @pytest.fixture(scope='module',
@@ -65,12 +55,12 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'][:, :, 0]
 
     if score == 'IV-type':
-        kwargs['ml_g'] = dummy_learner()
+        kwargs['ml_g'] = dummy_regressor()
         ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'][:, :, 0]
 
 
-    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_learner(),
-                             ml_l=dummy_learner(),
+    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_regressor(),
+                             ml_l=dummy_regressor(),
                              **kwargs)
 
     np.random.seed(3141)
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_external_predictions_IV.py
index 28e7feb6..5563ef90 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_external_predictions_IV.py
@@ -4,22 +4,7 @@
 from sklearn.linear_model import LinearRegression, LassoCV
 from doubleml import DoubleMLPLIV, DoubleMLData
 from doubleml.datasets import make_pliv_CHS2015
-
-
-class dummy_learner:
-    _estimator_type = "regressor"
-
-    def fit(*args):
-        raise AttributeError("Accessed fit method!")
-
-    def predict(*args):
-        raise AttributeError("Accessed predict method!")
-
-    def set_params(*args):
-        raise AttributeError("Accessed set_params method!")
-
-    def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method!")
+from doubleml.utils import dummy_regressor
 
 
 @pytest.fixture(scope="module", params=["partialling out", "IV-type"])
@@ -87,7 +72,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         if dim_z == 1:
             ext_predictions["d"]["ml_m"] = DMLPLIV.predictions["ml_m"][:, :, 0]
             if score == "IV-type":
-                kwargs["ml_g"] = dummy_learner()
+                kwargs["ml_g"] = dummy_regressor()
                 ext_predictions["d"]["ml_g"] = DMLPLIV.predictions["ml_g"][:, :, 0]
         else:
             for instr in range(dim_z):
@@ -95,7 +80,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
                 ext_predictions["d"][ml_m_key] = DMLPLIV.predictions[ml_m_key][:, :, 0]
 
         DMLPLIV_ext = DoubleMLPLIV(
-            ml_m=dummy_learner(), ml_l=dummy_learner(), ml_r=dummy_learner(), **kwargs
+            ml_m=dummy_regressor(), ml_l=dummy_regressor(), ml_r=dummy_regressor(), **kwargs
         )
 
         np.random.seed(3141)
diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
new file mode 100644
index 00000000..b3fbb9f0
--- /dev/null
+++ b/doubleml/utils/__init__.py
@@ -0,0 +1,7 @@
+from .dummy_learners import dummy_classifier
+from .dummy_learners import dummy_regressor
+
+__all__ = [
+    "dummy_classifier",
+    "dummy_regressor",
+]
diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
new file mode 100644
index 00000000..11e5caf1
--- /dev/null
+++ b/doubleml/utils/dummy_learners.py
@@ -0,0 +1,30 @@
+class dummy_regressor:
+    _estimator_type = "regressor"
+
+    def fit(*args):
+        raise AttributeError("Accessed fit method of dummy_regressor!")
+
+    def predict(*args):
+        raise AttributeError("Accessed predict method of dummy_regressor!")
+
+    def set_params(*args):
+        raise AttributeError("Accessed set_params method of dummy_regressor!")
+
+    def get_params(*args, **kwargs):
+        raise AttributeError("Accessed get_params method of dummy_regressor!")
+
+
+class dummy_classifier:
+    _estimator_type = "classifier"
+
+    def fit(*args):
+        raise AttributeError("Accessed fit method of dummy_classifier!")
+
+    def predict(*args):
+        raise AttributeError("Accessed predict method of dummy_classifier!")
+
+    def set_params(*args):
+        raise AttributeError("Accessed set_params method of dummy_classifier!")
+
+    def get_params(*args, **kwargs):
+        raise AttributeError("Accessed get_params method of dummy_classifier!")

From b1aa16afbca336143186ecb6a5e279fbfc8a71ef Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 1 Sep 2023 12:08:29 +0200
Subject: [PATCH 029/134] code formatting

---
 doubleml/tests/test_external_predictions.py | 58 ++++++++-------------
 1 file changed, 23 insertions(+), 35 deletions(-)

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index 26043da3..d0ba02a3 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -5,74 +5,62 @@
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
 from doubleml.utils import dummy_regressor
-    
 
-@pytest.fixture(scope='module',
-                params=['IV-type', 'partialling out'])
+
+@pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def score(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=['dml1', 'dml2'])
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
 def dml_procedure(request):
     return request.param
 
-@pytest.fixture(scope='module',
-                params=[1, 3])
+
+@pytest.fixture(scope="module", params=[1, 3])
 def n_rep(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
 def adapted_doubleml_fixture(score, dml_procedure, n_rep):
-    ext_predictions = {'d': {}}
+    ext_predictions = {"d": {}}
 
-    x, y, d = make_plr_CCDDHNR2018(n_obs=500,
-                                   dim_x=20,
-                                   alpha=0.5,
-                                   return_type="np.array")
+    x, y, d = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array")
 
     np.random.seed(3141)
 
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
 
-    kwargs = {'obj_dml_data': dml_data,
-              'score': score,
-              'n_rep': n_rep,
-              'dml_procedure': dml_procedure}
+    kwargs = {"obj_dml_data": dml_data, "score": score, "n_rep": n_rep, "dml_procedure": dml_procedure}
 
-    if score == 'IV-type':
-        kwargs['ml_g'] = LinearRegression()
+    if score == "IV-type":
+        kwargs["ml_g"] = LinearRegression()
 
-    DMLPLR = DoubleMLPLR(ml_m=LinearRegression(),
-                         ml_l=LinearRegression(),
-                         **kwargs)
+    DMLPLR = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
     np.random.seed(3141)
 
     DMLPLR.fit(store_predictions=True)
 
-    ext_predictions['d']['ml_m'] = DMLPLR.predictions['ml_m'][:, :, 0]
-    ext_predictions['d']['ml_l'] = DMLPLR.predictions['ml_l'][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
 
-    if score == 'IV-type':
-        kwargs['ml_g'] = dummy_regressor()
-        ext_predictions['d']['ml_g'] = DMLPLR.predictions['ml_g'][:, :, 0]
+    if score == "IV-type":
+        kwargs["ml_g"] = dummy_regressor()
+        ext_predictions["d"]["ml_g"] = DMLPLR.predictions["ml_g"][:, :, 0]
 
-
-    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_regressor(),
-                             ml_l=dummy_regressor(),
-                             **kwargs)
+    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_regressor(), ml_l=dummy_regressor(), **kwargs)
 
     np.random.seed(3141)
     DMLPLR_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {'coef_normal': DMLPLR.coef,
-                'coef_ext': DMLPLR_ext.coef}
+    res_dict = {"coef_normal": DMLPLR.coef, "coef_ext": DMLPLR_ext.coef}
 
     return res_dict
 
+
 @pytest.mark.ci
 def test_adapted_doubleml_coef(adapted_doubleml_fixture):
-    assert math.isclose(adapted_doubleml_fixture['coef_normal'],
-                        adapted_doubleml_fixture['coef_ext'],
-                        rel_tol=1e-9, abs_tol=1e-4)
\ No newline at end of file
+    assert math.isclose(
+        adapted_doubleml_fixture["coef_normal"], adapted_doubleml_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
+    )

From e0e8c154adb0af8f79c6c83481d36812a14ee043 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 14 Sep 2023 10:25:55 +0200
Subject: [PATCH 030/134] Update dummy_learners.py to allow the get / set
 params method

---
 doubleml/utils/dummy_learners.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 11e5caf1..a15ae441 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -8,10 +8,10 @@ def predict(*args):
         raise AttributeError("Accessed predict method of dummy_regressor!")
 
     def set_params(*args):
-        raise AttributeError("Accessed set_params method of dummy_regressor!")
+        print("\n\nAccessed set_params method of dummy_regressor!\n\n")
 
     def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method of dummy_regressor!")
+        print("\n\nAccessed get_params method of dummy_regressor!\n\n")
 
 
 class dummy_classifier:

From 0b45b542973509e336e99f04f77fa3325dee5cef Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 14 Sep 2023 11:01:47 +0200
Subject: [PATCH 031/134] Redo changes

---
 doubleml/utils/dummy_learners.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index a15ae441..e29742f8 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -8,10 +8,10 @@ def predict(*args):
         raise AttributeError("Accessed predict method of dummy_regressor!")
 
     def set_params(*args):
-        print("\n\nAccessed set_params method of dummy_regressor!\n\n")
+        raise AttributeError("Accessed set_params method of dummy_regressor!)
 
     def get_params(*args, **kwargs):
-        print("\n\nAccessed get_params method of dummy_regressor!\n\n")
+        raise AttributeError("Accessed get_params method of dummy_regressor!")
 
 
 class dummy_classifier:

From 03b0831f96baedf4f7c6810463886bbd183b13d8 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 14 Sep 2023 11:02:00 +0200
Subject: [PATCH 032/134] typo

---
 doubleml/utils/dummy_learners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index e29742f8..11e5caf1 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -8,7 +8,7 @@ def predict(*args):
         raise AttributeError("Accessed predict method of dummy_regressor!")
 
     def set_params(*args):
-        raise AttributeError("Accessed set_params method of dummy_regressor!)
+        raise AttributeError("Accessed set_params method of dummy_regressor!")
 
     def get_params(*args, **kwargs):
         raise AttributeError("Accessed get_params method of dummy_regressor!")

From b2808e15d1594c26f485b6ee7532e595c52b96db Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 18 Sep 2023 14:51:40 +0200
Subject: [PATCH 033/134] update badges

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4431b5ab..f91eff54 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 [![Conda Version](https://img.shields.io/conda/vn/conda-forge/doubleml.svg)](https://anaconda.org/conda-forge/doubleml)
 [![codecov](https://codecov.io/gh/DoubleML/doubleml-for-py/branch/main/graph/badge.svg?token=0BjlFPgdGk)](https://codecov.io/gh/DoubleML/doubleml-for-py)
 [![Codacy Badge](https://app.codacy.com/project/badge/Grade/1c08ec7d782c451784293c996537de14)](https://www.codacy.com/gh/DoubleML/doubleml-for-py/dashboard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=DoubleML/doubleml-for-py&amp;utm_campaign=Badge_Grade)
-[![Python version](https://img.shields.io/badge/python-3.6%20%7C%203.7%20%7C%203.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://www.python.org/)
+[![Python version](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue)](https://www.python.org/)
 
 The Python package **DoubleML** provides an implementation of the double / debiased machine learning framework of
 [Chernozhukov et al. (2018)](https://doi.org/10.1111/ectj.12097).

From 681da5a8121a35f0edd383b8cb9359a3cc6a6b3e Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 18 Sep 2023 14:51:53 +0200
Subject: [PATCH 034/134] increment dev version

---
 doc/conf.py | 2 +-
 setup.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/conf.py b/doc/conf.py
index 29f6a63d..9b71a148 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -22,7 +22,7 @@
 author = 'Bach, P., Chernozhukov, V., Kurz, M. S., and Spindler, M.'
 
 # The full version, including alpha/beta/rc tags
-release = '0.6.dev0'
+release = '0.8.dev0'
 
 
 # -- General configuration ---------------------------------------------------
diff --git a/setup.py b/setup.py
index 77cd729f..3749fcb5 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 
 setup(
     name='DoubleML',
-    version='0.7.dev0',
+    version='0.8.dev0',
     author='Bach, P., Chernozhukov, V., Kurz, M. S., and Spindler, M.',
     maintainer='Malte S. Kurz',
     maintainer_email='malte.simon.kurz@uni-hamburg.de',

From 101ef52582798a24876ec3c0b3ba0c35385b45b1 Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Wed, 1 Nov 2023 16:25:23 -0700
Subject: [PATCH 035/134] insert weights

---
 doubleml/double_ml_irm.py | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 4bbe42f7..c13cff10 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -12,7 +12,7 @@
 from .double_ml_score_mixins import LinearScoreMixin
 
 from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm, _normalize_ipw
-from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, _check_integer
+from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, _check_integer, _check_weights
 
 
 class DoubleMLIRM(LinearScoreMixin, DoubleML):
@@ -47,6 +47,11 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
         or a callable object / function with signature ``psi_a, psi_b = score(y, d, g_hat0, g_hat1, m_hat, smpls)``.
         Default is ``'ATE'``.
 
+    weights : array or None
+        An numpy array of weights for each individual observation. If None, then the ``'ATE'`` score
+        is applied. Can only be used with ``score = 'ATE'``.
+        Default is ``None``.
+
     dml_procedure : str
         A str (``'dml1'`` or ``'dml2'``) specifying the double machine learning algorithm.
         Default is ``'dml2'``.
@@ -118,6 +123,7 @@ def __init__(self,
                  n_folds=5,
                  n_rep=1,
                  score='ATE',
+                 weights=None,
                  dml_procedure='dml2',
                  normalize_ipw=False,
                  trimming_rule='truncate',
@@ -160,6 +166,11 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        
+        _check_weights(weights, score, obj_dml_data.n_obs, obj_dml_data.n_treat)
+        if weights is None:
+            weights = np.ones((obj_dml_data.n_obs, obj_dml_data.n_treat))
+        self._weights = weights
 
     @property
     def normalize_ipw(self):
@@ -181,6 +192,13 @@ def trimming_threshold(self):
         Specifies the used trimming threshold.
         """
         return self._trimming_threshold
+    
+    @property
+    def weights(self):
+        """
+        Specifies the weights for a weighted ATE.
+        """
+        return self._weights
 
     def _initialize_ml_nuisance_params(self):
         valid_learner = ['ml_g0', 'ml_g1', 'ml_m']

From 135d37ca053825bece9c976afd2845ed9dce66dd Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Wed, 1 Nov 2023 16:25:47 -0700
Subject: [PATCH 036/134] add check for weights

---
 doubleml/_utils_checks.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/doubleml/_utils_checks.py b/doubleml/_utils_checks.py
index b4425b52..250ed394 100644
--- a/doubleml/_utils_checks.py
+++ b/doubleml/_utils_checks.py
@@ -226,3 +226,17 @@ def _check_benchmarks(benchmarks):
                 raise TypeError('benchmarks name must be of string type. '
                                 f'{str(benchmarks["name"][i])} of type {str(type(benchmarks["name"][i]))} was passed.')
     return
+
+def _check_weights(weights, score, n_obs, n_treat):
+    if weights is not None:
+        if score != "ATE":
+            raise NotImplementedError("weights can only be set for score type 'ATE'. "
+                                      f"{score} was passed.")
+        if not isinstance(weights, np.ndarray):
+            raise ValueError("weights must be a numpy array. "
+                             f"weights of type {str(type(weights))} was passed.")
+        if not np.all((0 <= weights) & (weights <= 1)):
+            raise ValueError("All weights values must be between 0 and 1")
+        if len(weights.shape) != 2 or weights.shape[0] != n_treat or weights.shape[1] != n_obs:
+            raise ValueError(f"weights must have shape ({n_treat},{n_obs}). "
+                             f"weights of shape {weights.shape} was passed.")

From dd244399fab6eda648d8a0ad9ffb4796b75d2824 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 15:49:00 +0100
Subject: [PATCH 037/134] Refact. Unit Test for ext. predictions

---
 doubleml/tests/test_external_predictions.py | 63 +++++++++++++++++----
 1 file changed, 52 insertions(+), 11 deletions(-)

diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_external_predictions.py
index d0ba02a3..f6409d40 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_external_predictions.py
@@ -1,14 +1,18 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV
-from doubleml import DoubleMLPLR, DoubleMLData
-from doubleml.datasets import make_plr_CCDDHNR2018
-from doubleml.utils import dummy_regressor
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLData
+from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
 
 
 @pytest.fixture(scope="module", params=["IV-type", "partialling out"])
-def score(request):
+def plr_score(request):
+    return request.param
+
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
+def irm_score(request):
     return request.param
 
 
@@ -23,7 +27,7 @@ def n_rep(request):
 
 
 @pytest.fixture(scope="module")
-def adapted_doubleml_fixture(score, dml_procedure, n_rep):
+def doubleml_plr_fixture(plr_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
 
     x, y, d = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array")
@@ -32,9 +36,9 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
 
     dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
 
-    kwargs = {"obj_dml_data": dml_data, "score": score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+    kwargs = {"obj_dml_data": dml_data, "score": plr_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
 
-    if score == "IV-type":
+    if plr_score == "IV-type":
         kwargs["ml_g"] = LinearRegression()
 
     DMLPLR = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
@@ -45,7 +49,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
     ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
 
-    if score == "IV-type":
+    if plr_score == "IV-type":
         kwargs["ml_g"] = dummy_regressor()
         ext_predictions["d"]["ml_g"] = DMLPLR.predictions["ml_g"][:, :, 0]
 
@@ -59,8 +63,45 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep):
     return res_dict
 
 
+@pytest.fixture(scope="module")
+def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    DMLIRM = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    DMLIRM.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+
+    DMLIRM_ext = DoubleMLIRM(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLIRM_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLIRM.coef, "coef_ext": DMLIRM_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_plr_coef(doubleml_plr_fixture):
+    assert math.isclose(
+        doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
+    )
+    
 @pytest.mark.ci
-def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+def test_doubleml_irm_coef(doubleml_irm_fixture):
     assert math.isclose(
-        adapted_doubleml_fixture["coef_normal"], adapted_doubleml_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
+        doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
     )

From 7f698074db3cc96aaa1b8c5bc0aea2f79cec569f Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 15:49:12 +0100
Subject: [PATCH 038/134] Unit tests for IRM model

---
 doubleml/utils/dummy_learners.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 11e5caf1..e4c08767 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -28,3 +28,6 @@ def set_params(*args):
 
     def get_params(*args, **kwargs):
         raise AttributeError("Accessed get_params method of dummy_classifier!")
+
+    def predict_proba(*args, **kwargs):
+        raise AttributeError("Accessed predict_proba method of dummy_classifier!")

From d2ce02c0fd5661b79caa19bb404562b73dd03720 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 16:09:30 +0100
Subject: [PATCH 039/134] Impl. and Unit Tetsts for DID external predictions

---
 doubleml/double_ml_did.py                     |  2 +-
 .../tests/test_external_predictions_did.py    | 56 +++++++++++++++++++
 2 files changed, 57 insertions(+), 1 deletion(-)
 create mode 100644 doubleml/tests/test_external_predictions_did.py

diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index c76cee98..87c02931 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -218,7 +218,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             # adjust target values to consider only compatible subsamples
             g_hat0['targets'] = g_hat0['targets'].astype(float)
             g_hat0['targets'][d == 1] = np.nan
-        
+
         # nuisance g for d==1
         if external_predictions['ml_g1'] is not None:
             g_hat1 = {'preds': external_predictions['ml_g1'],
diff --git a/doubleml/tests/test_external_predictions_did.py b/doubleml/tests/test_external_predictions_did.py
new file mode 100644
index 00000000..9e14de6d
--- /dev/null
+++ b/doubleml/tests/test_external_predictions_did.py
@@ -0,0 +1,56 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLData, DoubleMLDID
+from doubleml.datasets import make_did_SZ2020
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=['observational', 'experimental'])
+def did_score(request):
+    return request.param
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_fixture(did_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    np.random.seed(3141)
+
+    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
+
+    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    DMLDID.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLDID.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
+
+    DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLDID_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
+
+    return res_dict
+
+@pytest.mark.ci
+def test_doubleml_did_coef(doubleml_did_fixture):
+    assert math.isclose(
+        doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
+    )

From 91e481aeecc8f33905bc13c3bfebf9aacc5b95a2 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 16:31:18 +0100
Subject: [PATCH 040/134] dummy_learners inherit from sklearn BaseEstimator

---
 doubleml/utils/dummy_learners.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index e4c08767..6043413f 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -1,4 +1,6 @@
-class dummy_regressor:
+from sklearn.base import BaseEstimator
+
+class dummy_regressor(BaseEstimator):
     _estimator_type = "regressor"
 
     def fit(*args):
@@ -14,7 +16,7 @@ def get_params(*args, **kwargs):
         raise AttributeError("Accessed get_params method of dummy_regressor!")
 
 
-class dummy_classifier:
+class dummy_classifier(BaseEstimator):
     _estimator_type = "classifier"
 
     def fit(*args):

From 8ae786758782d5fd7d00fc51d00c811318527c02 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 14 Nov 2023 16:31:38 +0100
Subject: [PATCH 041/134] Impl. and Unit Tetsts for DIDCS external preds.

---
 doubleml/double_ml_did_cs.py                  | 80 ++++++++++++-------
 .../tests/test_external_predictions_did.py    | 41 +++++++++-
 2 files changed, 91 insertions(+), 30 deletions(-)

diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index 53910946..f0986eed 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -228,40 +228,62 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
         # nuisance g
         smpls_d0_t0, smpls_d0_t1, smpls_d1_t0, smpls_d1_t1 = _get_cond_smpls_2d(smpls, d, t)
-
-        g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
-        g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
-
-        g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
-        g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
-
-        g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
-        g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
-
-        g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
-                                      est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
-                                      return_models=return_models)
-        g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
-        g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
+        if external_predictions['ml_g_d0_t0'] is not None:
+            g_hat_d0_t0 = {'preds': external_predictions['ml_g_d0_t0'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            
+            g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
+            g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
+        if external_predictions['ml_g_d0_t1'] is not None:
+            g_hat_d0_t1 = {'preds': external_predictions['ml_g_d0_t1'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
+            g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
+        if external_predictions['ml_g_d1_t0'] is not None:
+            g_hat_d1_t0 = {'preds': external_predictions['ml_g_d1_t0'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
+            g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
+        if external_predictions['ml_g_d1_t1'] is not None:
+            g_hat_d1_t1 = {'preds': external_predictions['ml_g_d1_t1'],
+                           'targets': None,
+                           'models': None}
+        else:
+            g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
+                                        return_models=return_models)
+            g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
+            g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
 
         # only relevant for observational or experimental setting
         m_hat = {'preds': None, 'targets': None, 'models': None}
         if self.score == 'observational':
             # nuisance m
-            m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                    return_models=return_models)
-            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+            if external_predictions['ml_m'] is not None:
+                m_hat = {'preds': external_predictions['ml_m'],
+                         'targets': None,
+                         'models': None}
+            else:
+                m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                        return_models=return_models)
+                _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+                _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
             m_hat['preds'] = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
 
         psi_a, psi_b = self._score_elements(y, d, t,
diff --git a/doubleml/tests/test_external_predictions_did.py b/doubleml/tests/test_external_predictions_did.py
index 9e14de6d..308d9e8b 100644
--- a/doubleml/tests/test_external_predictions_did.py
+++ b/doubleml/tests/test_external_predictions_did.py
@@ -2,7 +2,7 @@
 import pytest
 import math
 from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLData, DoubleMLDID
+from doubleml import DoubleMLData, DoubleMLDID, DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
 
@@ -49,8 +49,47 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
 
     return res_dict
 
+
+@pytest.fixture(scope="module")
+def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    np.random.seed(3141)
+
+    dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
+
+    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    DMLDIDCS = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    DMLDIDCS.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g_d0_t0"] = DMLDIDCS.predictions["ml_g_d0_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d0_t1"] = DMLDIDCS.predictions["ml_g_d0_t1"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t0"] = DMLDIDCS.predictions["ml_g_d1_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t1"] = DMLDIDCS.predictions["ml_g_d1_t1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLDIDCS.predictions["ml_m"][:, :, 0]
+
+    DMLDIDCS_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLDIDCS_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLDIDCS.coef, "coef_ext": DMLDIDCS_ext.coef}
+
+    return res_dict
+
+
+
 @pytest.mark.ci
 def test_doubleml_did_coef(doubleml_did_fixture):
     assert math.isclose(
         doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
     )
+
+@pytest.mark.ci
+def test_doubleml_didcs_coef(doubleml_didcs_fixture):
+    assert math.isclose(
+        doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
+    )
\ No newline at end of file

From cec948e061c01f22a414e6599fab7dbfdb1ab97a Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Wed, 15 Nov 2023 14:13:26 -0800
Subject: [PATCH 042/134] update weights check

---
 doubleml/_utils_checks.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doubleml/_utils_checks.py b/doubleml/_utils_checks.py
index 250ed394..c8c3cd48 100644
--- a/doubleml/_utils_checks.py
+++ b/doubleml/_utils_checks.py
@@ -237,6 +237,9 @@ def _check_weights(weights, score, n_obs, n_treat):
                              f"weights of type {str(type(weights))} was passed.")
         if not np.all((0 <= weights) & (weights <= 1)):
             raise ValueError("All weights values must be between 0 and 1")
-        if len(weights.shape) != 2 or weights.shape[0] != n_treat or weights.shape[1] != n_obs:
-            raise ValueError(f"weights must have shape ({n_treat},{n_obs}). "
+        if len(weights.shape) != 1 or weights.shape[0] != n_obs:
+            raise ValueError(f"weights must have shape ({n_obs},). "
                              f"weights of shape {weights.shape} was passed.")
+        if weights.sum() == 0:
+            raise ValueError(f"At least one weight must be non-zero.")
+    return
\ No newline at end of file

From c015c26a9315810fb44586fa9b51bd8be88c9eb6 Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Wed, 15 Nov 2023 14:13:45 -0800
Subject: [PATCH 043/134] update weights implementation

---
 doubleml/double_ml_irm.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index c13cff10..3118afa5 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -168,9 +168,8 @@ def __init__(self,
         self._sensitivity_implemented = True
         
         _check_weights(weights, score, obj_dml_data.n_obs, obj_dml_data.n_treat)
-        if weights is None:
-            weights = np.ones((obj_dml_data.n_obs, obj_dml_data.n_treat))
-        self._weights = weights
+        if weights is not None:
+            self._weights = weights
 
     @property
     def normalize_ipw(self):
@@ -198,7 +197,7 @@ def weights(self):
         """
         Specifies the weights for a weighted ATE.
         """
-        return self._weights
+        return self._weights if hasattr(self,"_weights") else None
 
     def _initialize_ml_nuisance_params(self):
         valid_learner = ['ml_g0', 'ml_g1', 'ml_m']

From ce26165557e39f7572a5c7529e20656c2225e00c Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Wed, 15 Nov 2023 14:13:58 -0800
Subject: [PATCH 044/134] add weights to linear score

---
 doubleml/double_ml_score_mixins.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml_score_mixins.py b/doubleml/double_ml_score_mixins.py
index 809efbde..922d28fc 100644
--- a/doubleml/double_ml_score_mixins.py
+++ b/doubleml/double_ml_score_mixins.py
@@ -34,11 +34,17 @@ def _score_element_names(self):
         return ['psi_a', 'psi_b']
 
     def _compute_score(self, psi_elements, coef):
-        psi = psi_elements['psi_a'] * coef + psi_elements['psi_b']
+        if hasattr(self, "_weights"):
+            psi = self.weights * (psi_elements['psi_a'] * coef + psi_elements['psi_b'])
+        else:
+            psi = psi_elements['psi_a'] * coef + psi_elements['psi_b']
         return psi
 
     def _compute_score_deriv(self, psi_elements, coef):
-        return psi_elements['psi_a']
+        if hasattr(self, "_weights"):
+            return self.weights * psi_elements['psi_a']
+        else:
+            return psi_elements['psi_a']
 
     def _est_coef(self, psi_elements, smpls=None, scaling_factor=None, inds=None):
         psi_a = psi_elements['psi_a']
@@ -50,7 +56,10 @@ def _est_coef(self, psi_elements, smpls=None, scaling_factor=None, inds=None):
         # check whether we have cluster data and dml2
         is_dml2_and_cluster = self._is_cluster_data and (self.dml_procedure == 'dml2')
         if not is_dml2_and_cluster:
-            coef = - np.mean(psi_b) / np.mean(psi_a)
+            if hasattr(self, "_weights"):
+                coef = - np.average(psi_b/psi_a, weights = self.weights)
+            else:
+                coef = - np.mean(psi_b) / np.mean(psi_a)
         # for cluster and dml2 we need the smpls and the scaling factors
         else:
             assert smpls is not None

From 39a6cdabfc0e95b2f2b6bda86f488abcae921268 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 16 Nov 2023 13:26:48 +0100
Subject: [PATCH 045/134] dummy_learners are now "cloneable"

---
 doubleml/utils/dummy_learners.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 6043413f..2f893fb2 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -1,5 +1,6 @@
 from sklearn.base import BaseEstimator
 
+
 class dummy_regressor(BaseEstimator):
     _estimator_type = "regressor"
 
@@ -12,9 +13,6 @@ def predict(*args):
     def set_params(*args):
         raise AttributeError("Accessed set_params method of dummy_regressor!")
 
-    def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method of dummy_regressor!")
-
 
 class dummy_classifier(BaseEstimator):
     _estimator_type = "classifier"
@@ -28,8 +26,5 @@ def predict(*args):
     def set_params(*args):
         raise AttributeError("Accessed set_params method of dummy_classifier!")
 
-    def get_params(*args, **kwargs):
-        raise AttributeError("Accessed get_params method of dummy_classifier!")
-
     def predict_proba(*args, **kwargs):
         raise AttributeError("Accessed predict_proba method of dummy_classifier!")

From 40413e1bab936f515c78825e98b92ffefe3430a5 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 16 Nov 2023 13:27:12 +0100
Subject: [PATCH 046/134] Unit Tests for new dummy leaerner classes

---
 doubleml/tests/test_dummy_learners.py | 46 +++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 doubleml/tests/test_dummy_learners.py

diff --git a/doubleml/tests/test_dummy_learners.py b/doubleml/tests/test_dummy_learners.py
new file mode 100644
index 00000000..ee3d979a
--- /dev/null
+++ b/doubleml/tests/test_dummy_learners.py
@@ -0,0 +1,46 @@
+import pytest
+import numpy as np
+from doubleml.utils import dummy_regressor, dummy_classifier
+from sklearn.base import clone
+
+
+@pytest.fixture(scope="module")
+def dl_fixture():
+    fixture = {
+        "dummy_regressor": dummy_regressor(),
+        "dummy_classifier": dummy_classifier(),
+        "X": np.random.normal(0, 1, size=(100, 10)),
+        "y_con": np.random.normal(0, 1, size=(100, 1)),
+        "y_cat": np.random.binomial(1, 0.5, size=(100, 1)),
+    }
+
+    return fixture
+
+
+@pytest.mark.ci
+def test_fit(dl_fixture):
+    msg = "Accessed fit method of dummy_regressor!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["dummy_regressor"].fit(dl_fixture["X"], dl_fixture["y_con"])
+    msg = "Accessed fit method of dummy_classifier!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["dummy_classifier"].fit(dl_fixture["X"], dl_fixture["y_cat"])
+
+
+@pytest.mark.ci
+def test_predict(dl_fixture):
+    msg = "Accessed predict method of dummy_regressor!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["dummy_regressor"].predict(dl_fixture["X"])
+    msg = "Accessed predict method of dummy_classifier!"
+    with pytest.raises(AttributeError, match=msg):
+        dl_fixture["dummy_classifier"].predict(dl_fixture["X"])
+
+
+@pytest.mark.ci
+def test_clone(dl_fixture):
+    try:
+        _ = clone(dl_fixture["dummy_regressor"])
+        _ = clone(dl_fixture["dummy_classifier"])
+    except Error as e:
+        pytest.fail(f"clone() raised an exception:\n{str(e)}\n")

From 59800124cac4ddc43f88a3343e81ec9139d14f16 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 16 Nov 2023 13:35:40 +0100
Subject: [PATCH 047/134] formatting

---
 .../tests/test_external_predictions_did.py    | 29 ++++---------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/doubleml/tests/test_external_predictions_did.py b/doubleml/tests/test_external_predictions_did.py
index 308d9e8b..27495c6c 100644
--- a/doubleml/tests/test_external_predictions_did.py
+++ b/doubleml/tests/test_external_predictions_did.py
@@ -7,10 +7,11 @@
 from doubleml.utils import dummy_regressor, dummy_classifier
 
 
-@pytest.fixture(scope="module", params=['observational', 'experimental'])
+@pytest.fixture(scope="module", params=["observational", "experimental"])
 def did_score(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=["dml1", "dml2"])
 def dml_procedure(request):
     return request.param
@@ -24,16 +25,10 @@ def n_rep(request):
 @pytest.fixture(scope="module")
 def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
-
-    np.random.seed(3141)
-
     dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
-
     kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
-
     DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
     np.random.seed(3141)
-
     DMLDID.fit(store_predictions=True)
 
     ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
@@ -41,10 +36,9 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
 
     DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-
     np.random.seed(3141)
     DMLDID_ext.fit(external_predictions=ext_predictions)
-
+    
     res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
 
     return res_dict
@@ -53,16 +47,10 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
 @pytest.fixture(scope="module")
 def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
-
-    np.random.seed(3141)
-
     dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
-
     kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
-
     DMLDIDCS = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
     np.random.seed(3141)
-
     DMLDIDCS.fit(store_predictions=True)
 
     ext_predictions["d"]["ml_g_d0_t0"] = DMLDIDCS.predictions["ml_g_d0_t0"][:, :, 0]
@@ -72,7 +60,6 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLDIDCS.predictions["ml_m"][:, :, 0]
 
     DMLDIDCS_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-
     np.random.seed(3141)
     DMLDIDCS_ext.fit(external_predictions=ext_predictions)
 
@@ -81,15 +68,11 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     return res_dict
 
 
-
 @pytest.mark.ci
 def test_doubleml_did_coef(doubleml_did_fixture):
-    assert math.isclose(
-        doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
-    )
+    assert math.isclose(doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
+
 
 @pytest.mark.ci
 def test_doubleml_didcs_coef(doubleml_didcs_fixture):
-    assert math.isclose(
-        doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3
-    )
\ No newline at end of file
+    assert math.isclose(doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)

From d3109b65fdc6d6632623da8c198f0e417798e786 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 28 Nov 2023 12:43:08 +0100
Subject: [PATCH 048/134] seperate testfiles for unit tests for ext. preds.

---
 .../tests/test_did_external_predictions.py    | 49 +++++++++++++++
 ....py => test_didcs_external_predictions.py} | 29 +--------
 .../tests/test_irm_external_predictions.py    | 63 +++++++++++++++++++
 ...V.py => test_pliv_external_predictions.py} |  4 +-
 ...ns.py => test_plr_external_predictions.py} | 51 ++-------------
 5 files changed, 118 insertions(+), 78 deletions(-)
 create mode 100644 doubleml/tests/test_did_external_predictions.py
 rename doubleml/tests/{test_external_predictions_did.py => test_didcs_external_predictions.py} (61%)
 create mode 100644 doubleml/tests/test_irm_external_predictions.py
 rename doubleml/tests/{test_external_predictions_IV.py => test_pliv_external_predictions.py} (97%)
 rename doubleml/tests/{test_external_predictions.py => test_plr_external_predictions.py} (50%)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
new file mode 100644
index 00000000..ebf8b616
--- /dev/null
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -0,0 +1,49 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLData, DoubleMLDID
+from doubleml.datasets import make_did_SZ2020
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=["observational", "experimental"])
+def did_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_did_fixture(did_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
+    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+    DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+    DMLDID.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLDID.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
+
+    DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    np.random.seed(3141)
+    DMLDID_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_did_coef(doubleml_did_fixture):
+    assert math.isclose(doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
diff --git a/doubleml/tests/test_external_predictions_did.py b/doubleml/tests/test_didcs_external_predictions.py
similarity index 61%
rename from doubleml/tests/test_external_predictions_did.py
rename to doubleml/tests/test_didcs_external_predictions.py
index 27495c6c..6effc805 100644
--- a/doubleml/tests/test_external_predictions_did.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -2,7 +2,7 @@
 import pytest
 import math
 from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLData, DoubleMLDID, DoubleMLDIDCS
+from doubleml import DoubleMLData, DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
 
@@ -22,28 +22,6 @@ def n_rep(request):
     return request.param
 
 
-@pytest.fixture(scope="module")
-def doubleml_did_fixture(did_score, dml_procedure, n_rep):
-    ext_predictions = {"d": {}}
-    dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
-    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
-    DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    np.random.seed(3141)
-    DMLDID.fit(store_predictions=True)
-
-    ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLDID.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
-
-    DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-    np.random.seed(3141)
-    DMLDID_ext.fit(external_predictions=ext_predictions)
-    
-    res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
-
-    return res_dict
-
-
 @pytest.fixture(scope="module")
 def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
@@ -68,11 +46,6 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     return res_dict
 
 
-@pytest.mark.ci
-def test_doubleml_did_coef(doubleml_did_fixture):
-    assert math.isclose(doubleml_did_fixture["coef_normal"], doubleml_did_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
-
-
 @pytest.mark.ci
 def test_doubleml_didcs_coef(doubleml_didcs_fixture):
     assert math.isclose(doubleml_didcs_fixture["coef_normal"], doubleml_didcs_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-3)
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
new file mode 100644
index 00000000..19c96330
--- /dev/null
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -0,0 +1,63 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLIRM, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=["ATE", "ATTE"])
+def irm_score(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
+
+    kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+
+    DMLIRM = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    np.random.seed(3141)
+
+    DMLIRM.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+
+    DMLIRM_ext = DoubleMLIRM(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLIRM_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLIRM.coef, "coef_ext": DMLIRM_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_plr_coef(doubleml_plr_fixture):
+    assert math.isclose(doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_doubleml_irm_coef(doubleml_irm_fixture):
+    assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
diff --git a/doubleml/tests/test_external_predictions_IV.py b/doubleml/tests/test_pliv_external_predictions.py
similarity index 97%
rename from doubleml/tests/test_external_predictions_IV.py
rename to doubleml/tests/test_pliv_external_predictions.py
index 5563ef90..cbd13dfe 100644
--- a/doubleml/tests/test_external_predictions_IV.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -31,9 +31,7 @@ def dim_z(request):
 def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
     # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
     if dim_z > 1 and score == "IV-type":
-        res_dict = {"coef_normal": 1, "coef_ext": 1}
-
-        return res_dict
+        pytest.skip("IV-type score only allows dim_z = 1")
     else:
         ext_predictions = {"d": {}}
 
diff --git a/doubleml/tests/test_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
similarity index 50%
rename from doubleml/tests/test_external_predictions.py
rename to doubleml/tests/test_plr_external_predictions.py
index f6409d40..ca04794f 100644
--- a/doubleml/tests/test_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -2,19 +2,15 @@
 import pytest
 import math
 from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLData
-from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml import DoubleMLPLR, DoubleMLData
+from doubleml.datasets import make_plr_CCDDHNR2018
+from doubleml.utils import dummy_regressor
 
 
 @pytest.fixture(scope="module", params=["IV-type", "partialling out"])
 def plr_score(request):
     return request.param
 
-@pytest.fixture(scope="module", params=["ATE", "ATTE"])
-def irm_score(request):
-    return request.param
-
 
 @pytest.fixture(scope="module", params=["dml1", "dml2"])
 def dml_procedure(request):
@@ -63,45 +59,6 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep):
     return res_dict
 
 
-@pytest.fixture(scope="module")
-def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
-    ext_predictions = {"d": {}}
-
-    x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
-
-    np.random.seed(3141)
-
-    dml_data = DoubleMLData.from_arrays(x=x, y=y, d=d)
-
-    kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
-
-    DMLIRM = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    np.random.seed(3141)
-
-    DMLIRM.fit(store_predictions=True)
-
-    ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
-
-    DMLIRM_ext = DoubleMLIRM(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-
-    np.random.seed(3141)
-    DMLIRM_ext.fit(external_predictions=ext_predictions)
-
-    res_dict = {"coef_normal": DMLIRM.coef, "coef_ext": DMLIRM_ext.coef}
-
-    return res_dict
-
-
 @pytest.mark.ci
 def test_doubleml_plr_coef(doubleml_plr_fixture):
-    assert math.isclose(
-        doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
-    )
-    
-@pytest.mark.ci
-def test_doubleml_irm_coef(doubleml_irm_fixture):
-    assert math.isclose(
-        doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4
-    )
+    assert math.isclose(doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From efa436d873512e7c9e93b94b18e5fc28ea0f4c71 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 28 Nov 2023 14:34:59 +0100
Subject: [PATCH 049/134] add external preds for iivm models

---
 doubleml/double_ml_iivm.py                    | 96 ++++++++++++-------
 .../tests/test_iivm_external_predictions.py   | 75 +++++++++++++++
 .../tests/test_irm_external_predictions.py    |  6 --
 3 files changed, 137 insertions(+), 40 deletions(-)
 create mode 100644 doubleml/tests/test_iivm_external_predictions.py

diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 78429ae9..1cb793b2 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -258,13 +258,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         smpls_z0, smpls_z1 = _get_cond_smpls(smpls, z)
 
         # nuisance g
-        g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat0['targets'] = g_hat0['targets'].astype(float)
-        g_hat0['targets'][z == 1] = np.nan
+        if external_predictions['ml_g0'] is not None:
+            g_hat0 = {'preds': external_predictions['ml_g0'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                    return_models=return_models)
+            _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat0['targets'] = g_hat0['targets'].astype(float)
+            g_hat0['targets'][z == 1] = np.nan
 
         if self._dml_data.binary_outcome:
             binary_preds = (type_of_target(g_hat0['preds']) == 'binary')
@@ -276,14 +281,18 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                  'probabilities and not labels are predicted.')
 
             _check_is_propensity(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)
-
-        g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
-        _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
-        # adjust target values to consider only compatible subsamples
-        g_hat1['targets'] = g_hat1['targets'].astype(float)
-        g_hat1['targets'][z == 0] = np.nan
+        if external_predictions['ml_g1'] is not None:
+            g_hat1 = {'preds': external_predictions['ml_g1'],
+                      'targets': None,
+                      'models': None}
+        else:
+            g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                    return_models=return_models)
+            _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
+            # adjust target values to consider only compatible subsamples
+            g_hat1['targets'] = g_hat1['targets'].astype(float)
+            g_hat1['targets'][z == 0] = np.nan
 
         if self._dml_data.binary_outcome:
             binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
@@ -297,34 +306,53 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             _check_is_propensity(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls, eps=1e-12)
 
         # nuisance m
-        m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
-                                est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
-                                return_models=return_models)
-        _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
-        _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
+        if external_predictions['ml_m'] is not None:
+            m_hat = {'preds': external_predictions['ml_m'],
+                     'targets': None,
+                     'models': None}
+        else:
+            m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
+                                    est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
+                                    return_models=return_models)
+            _check_finite_predictions(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls)
+            _check_is_propensity(m_hat['preds'], self._learner['ml_m'], 'ml_m', smpls, eps=1e-12)
 
         # nuisance r
+        r0 = external_predictions['ml_r0'] is not None
         if self.subgroups['always_takers']:
-            r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
-                                     return_models=return_models)
+            if r0:
+                r_hat0 = {'preds': external_predictions['ml_r0'],
+                          'targets': None,
+                          'models': None}
+            else:
+                r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
+                                        return_models=return_models)
         else:
             r_hat0 = {'preds': np.zeros_like(d), 'targets': np.zeros_like(d), 'models': None}
-        _check_finite_predictions(r_hat0['preds'], self._learner['ml_r'], 'ml_r', smpls)
-        # adjust target values to consider only compatible subsamples
-        r_hat0['targets'] = r_hat0['targets'].astype(float)
-        r_hat0['targets'][z == 1] = np.nan
+        if not r0:
+            _check_finite_predictions(r_hat0['preds'], self._learner['ml_r'], 'ml_r', smpls)
+            # adjust target values to consider only compatible subsamples
+            r_hat0['targets'] = r_hat0['targets'].astype(float)
+            r_hat0['targets'][z == 1] = np.nan
 
+        r1 = external_predictions['ml_r1'] is not None
         if self.subgroups['never_takers']:
-            r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                     est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
-                                     return_models=return_models)
+            if r1:
+                r_hat1 = {'preds': external_predictions['ml_r1'],
+                          'targets': None,
+                          'models': None}
+            else:
+                r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
+                                        est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
+                                        return_models=return_models)
         else:
             r_hat1 = {'preds': np.ones_like(d), 'targets': np.ones_like(d), 'models': None}
-        _check_finite_predictions(r_hat1['preds'], self._learner['ml_r'], 'ml_r', smpls)
-        # adjust target values to consider only compatible subsamples
-        r_hat1['targets'] = r_hat1['targets'].astype(float)
-        r_hat1['targets'][z == 0] = np.nan
+        if not r1:
+            _check_finite_predictions(r_hat1['preds'], self._learner['ml_r'], 'ml_r', smpls)
+            # adjust target values to consider only compatible subsamples
+            r_hat1['targets'] = r_hat1['targets'].astype(float)
+            r_hat1['targets'][z == 0] = np.nan
 
         psi_a, psi_b = self._score_elements(y, z, d,
                                             g_hat0['preds'], g_hat1['preds'], m_hat['preds'],
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
new file mode 100644
index 00000000..40bb02db
--- /dev/null
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -0,0 +1,75 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from doubleml import DoubleMLIIVM, DoubleMLData
+from doubleml.datasets import make_iivm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def adapted_doubleml_fixture(dml_procedure, n_rep):
+    ext_predictions = {"d": {}}
+
+    data = make_iivm_data(
+        n_obs=500, dim_x=20, theta=0.5, alpha_x=1.0, return_type="DataFrame"
+    )
+
+    np.random.seed(3141)
+
+    dml_data = DoubleMLData(data, "y", "d", z_cols="z")
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "LATE",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+    }
+
+    DMLIIVM = DoubleMLIIVM(
+        ml_g=LinearRegression(),
+        ml_m=LogisticRegression(),
+        ml_r=LogisticRegression(),
+        **kwargs,
+    )
+    np.random.seed(3141)
+
+    DMLIIVM.fit(store_predictions=True)
+    
+    ext_predictions["d"]["ml_g0"] = DMLIIVM.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = DMLIIVM.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLIIVM.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_r0"] = DMLIIVM.predictions["ml_r0"][:, :, 0]
+    ext_predictions["d"]["ml_r1"] = DMLIIVM.predictions["ml_r1"][:, :, 0]
+
+    
+    DMLIIVM_ext = DoubleMLIIVM(
+        ml_g=dummy_regressor(), ml_m=dummy_classifier(), ml_r=dummy_classifier(), **kwargs
+    )
+
+    np.random.seed(3141)
+    DMLIIVM_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLIIVM.coef, "coef_ext": DMLIIVM_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_adapted_doubleml_coef(adapted_doubleml_fixture):
+    assert math.isclose(
+        adapted_doubleml_fixture["coef_normal"],
+        adapted_doubleml_fixture["coef_ext"],
+        rel_tol=1e-9,
+        abs_tol=1e-4,
+    )
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index 19c96330..c1463a07 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -52,12 +52,6 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
 
     return res_dict
 
-
-@pytest.mark.ci
-def test_doubleml_plr_coef(doubleml_plr_fixture):
-    assert math.isclose(doubleml_plr_fixture["coef_normal"], doubleml_plr_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
-
-
 @pytest.mark.ci
 def test_doubleml_irm_coef(doubleml_irm_fixture):
     assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 4e3f36fdf89a5619d55af8e01f11e10f052f0252 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Tue, 28 Nov 2023 17:39:40 +0100
Subject: [PATCH 050/134] add external preds for pq models

---
 doubleml/double_ml_pq.py                      | 140 ++++++++++--------
 .../tests/test_pq_external_predictions.py     |  64 ++++++++
 2 files changed, 144 insertions(+), 60 deletions(-)
 create mode 100644 doubleml/tests/test_pq_external_predictions.py

diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index 76e49f1e..d785429f 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -261,79 +261,95 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
-
+        
+        g = external_predictions['ml_g'] is not None
+        m = external_predictions['ml_m'] is not None
+        
         # initialize nuisance predictions, targets and models
-        g_hat = {'models': None,
-                 'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                 'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                 }
-        m_hat = copy.deepcopy(g_hat)
-
-        ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
-        # initialize models
-        fitted_models = {}
-        for learner in self.params_names:
-            # set nuisance model parameters
-            est_params = self._get_params(learner)
-            if est_params is not None:
-                fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                          for i_fold in range(self.n_folds)]
-            else:
-                fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+        
+        if not (g and m):
+            g_hat = {'models': None,
+                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                    'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
+                    }
+            m_hat = copy.deepcopy(g_hat)
+            ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
+            # initialize models
+            fitted_models = {}
+            for learner in self.params_names:
+                # set nuisance model parameters
+                est_params = self._get_params(learner)
+                if est_params is not None:
+                    fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
+                                            for i_fold in range(self.n_folds)]
+                else:
+                    fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+        elif (g and not m) or (m and not g):
+            raise ValueError('External predictions for both g and m are required.')
+        else:
+            g_hat = {'models': None,
+                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                    'preds': external_predictions['ml_g']
+                    }
+            m_hat = {'models': None,
+                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                    'preds': external_predictions['ml_m']
+                    }
 
         # caculate nuisance functions over different folds
-        for i_fold in range(self.n_folds):
-            train_inds = smpls[i_fold][0]
-            test_inds = smpls[i_fold][1]
+        if not (g and m): 
+            for i_fold in range(self.n_folds):
+                train_inds = smpls[i_fold][0]
+                test_inds = smpls[i_fold][1]
 
-            # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                          random_state=42, stratify=d[train_inds])
-            smpls_prelim = [(train, test) for train, test in
-                            StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])]
+                # start nested crossfitting
+                train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
+                                                            random_state=42, stratify=d[train_inds])
+                smpls_prelim = [(train, test) for train, test in
+                                StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])]
 
-            d_train_1 = d[train_inds_1]
-            y_train_1 = y[train_inds_1]
-            x_train_1 = x[train_inds_1, :]
+                d_train_1 = d[train_inds_1]
+                y_train_1 = y[train_inds_1]
+                x_train_1 = x[train_inds_1, :]
 
-            # get a copy of ml_m as a preliminary learner
-            ml_m_prelim = clone(fitted_models['ml_m'][i_fold])
-            m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1,
-                                           method='predict_proba', smpls=smpls_prelim)['preds']
+                # get a copy of ml_m as a preliminary learner
+                ml_m_prelim = clone(fitted_models['ml_m'][i_fold])
+                m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1,
+                                            method='predict_proba', smpls=smpls_prelim)['preds']
 
-            m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
+                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
 
-            if self._normalize_ipw:
-                m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
-            if self.treatment == 0:
-                m_hat_prelim = 1 - m_hat_prelim
+                if self._normalize_ipw:
+                    m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
+                if self.treatment == 0:
+                    m_hat_prelim = 1 - m_hat_prelim
 
-            # preliminary ipw estimate
-            def ipw_score(theta):
-                res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_hat_prelim))
-                return res
+                # preliminary ipw estimate
+                def ipw_score(theta):
+                    res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_hat_prelim))
+                    return res
 
-            _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
-            ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
-            ipw_vec[i_fold] = ipw_est
+                _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
+                ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
+                ipw_vec[i_fold] = ipw_est
 
-            # use the preliminary estimates to fit the nuisance parameters on train_2
-            d_train_2 = d[train_inds_2]
-            y_train_2 = y[train_inds_2]
-            x_train_2 = x[train_inds_2, :]
+                # use the preliminary estimates to fit the nuisance parameters on train_2
+                d_train_2 = d[train_inds_2]
+                y_train_2 = y[train_inds_2]
+                x_train_2 = x[train_inds_2, :]
 
-            dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
-            y_treat_train_2 = y_train_2[d_train_2 == self.treatment]
+                dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
+                y_treat_train_2 = y_train_2[d_train_2 == self.treatment]
 
-            fitted_models['ml_g'][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
+                fitted_models['ml_g'][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
 
-            # predict nuisance values on the test data and the corresponding targets
-            g_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g'][i_fold], x[test_inds, :])
-            g_hat['targets'][test_inds] = y[test_inds] <= ipw_est
+                # predict nuisance values on the test data and the corresponding targets
+                g_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g'][i_fold], x[test_inds, :])
+                g_hat['targets'][test_inds] = y[test_inds] <= ipw_est
 
-            # refit the propensity score on the whole training set
-            fitted_models['ml_m'][i_fold].fit(x[train_inds, :], d[train_inds])
-            m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], x[test_inds, :])
+                # refit the propensity score on the whole training set
+                fitted_models['ml_m'][i_fold].fit(x[train_inds, :], d[train_inds])
+                m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], x[test_inds, :])
 
         # set target for propensity score
         m_hat['targets'] = d
@@ -348,6 +364,7 @@ def ipw_score(theta):
         # clip propensities and normalize ipw weights
         # this is not done in the score to save computation due to multiple score evaluations
         # to be able to evaluate the raw models the m_hat['preds'] are not changed
+        #if not (g and m):
         m_hat_adj = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
         if self._normalize_ipw:
             if self.dml_procedure == 'dml1':
@@ -358,9 +375,12 @@ def ipw_score(theta):
 
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
-
         # readjust start value for minimization
-        self._coef_start_val = np.mean(ipw_vec)
+        if not (g and m):
+            self._coef_start_val = np.mean(ipw_vec)
+        #else:
+        #    m_hat_adj = m_hat['preds']
+            
 
         psi_elements = {'ind_d': d == self.treatment, 'g': g_hat['preds'],
                         'm': m_hat_adj, 'y': y}
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
new file mode 100644
index 00000000..c21bd1c8
--- /dev/null
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LogisticRegression
+from doubleml import DoubleMLPQ, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
+    ext_predictions = {"d": {}}
+    np.random.seed(3141)
+    data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
+
+    dml_data = DoubleMLData(data, "y", "d")
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "PQ",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "normalize_ipw": normalize_ipw,
+    }
+
+    ml_g = LogisticRegression()
+    ml_m = LogisticRegression()
+
+    DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    np.random.seed(3141)
+
+    DMLPQ.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
+
+    DMLPLQ_ext = DoubleMLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+
+    np.random.seed(3141)
+    DMLPLQ_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLPQ.coef, "coef_ext": DMLPLQ_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_pq_coef(doubleml_pq_fixture):
+    assert math.isclose(doubleml_pq_fixture["coef_normal"], doubleml_pq_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 580d5d29025bf3c0d678db7ccb61ca705d06d9ca Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 28 Nov 2023 18:25:52 +0100
Subject: [PATCH 051/134] Update test_did_external_predictions.py

---
 doubleml/tests/test_did_external_predictions.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index ebf8b616..12d7e3c9 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -5,7 +5,7 @@
 from doubleml import DoubleMLData, DoubleMLDID
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
-
+from ._utils import draw_smpls
 
 @pytest.fixture(scope="module", params=["observational", "experimental"])
 def did_score(request):
@@ -26,8 +26,16 @@ def n_rep(request):
 def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
     dml_data = make_did_SZ2020(n_obs=500, return_type="DoubleMLData")
-    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": did_score,
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "draw_sample_splitting": False
+    }
     DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    DMLDID.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     DMLDID.fit(store_predictions=True)
 
@@ -36,6 +44,7 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
 
     DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    DMLDID_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     DMLDID_ext.fit(external_predictions=ext_predictions)
 

From 1957ce5f828e5c3893a417aaf1857f7efd33a5a1 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 28 Nov 2023 19:03:21 +0100
Subject: [PATCH 052/134] add basic cate version

---
 doubleml/double_ml_plr.py                  | 62 ++++++++++++++++++++++
 doubleml/tests/test_doubleml_exceptions.py | 32 +++++++++++
 doubleml/tests/test_plr.py                 | 22 ++++++++
 3 files changed, 116 insertions(+)

diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index d75cbcf8..c7586f48 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -8,6 +8,7 @@
 from .double_ml import DoubleML
 from .double_ml_data import DoubleMLData
 from .double_ml_score_mixins import LinearScoreMixin
+from .double_ml_blp import DoubleMLBLP
 
 from ._utils import _dml_cv_predict, _dml_tune
 from ._utils_checks import _check_score, _check_finite_predictions, _check_is_propensity
@@ -327,3 +328,64 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
                'tune_res': tune_res}
 
         return res
+
+    def cate(self, basis):
+        """
+        Calculate conditional average treatment effects (CATE) for a given basis.
+
+        Parameters
+        ----------
+        basis : :class:`pandas.DataFrame`
+            The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
+            where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
+
+        Returns
+        -------
+        model : :class:`doubleML.DoubleMLBLP`
+            Best linear Predictor model.
+        """
+        if self._dml_data.n_treat > 1:
+            raise NotImplementedError('Only implemented for one treatment. ' +
+                                      f'Number of treatments is {str(self._dml_data.n_treat)}.')
+        if self.n_rep != 1:
+            raise NotImplementedError('Only implemented for one repetition. ' +
+                                      f'Number of repetitions is {str(self.n_rep)}.')
+
+        Y_tilde, D_tilde = self._partial_out()
+
+        D_basis = basis * D_tilde
+        model = DoubleMLBLP(
+            orth_signal=Y_tilde.reshape(-1),
+            basis=D_basis,
+            is_gate=False,
+        )
+        model.fit()
+        return model
+
+    def _partial_out(self):
+        """
+        Helper function. Returns the partialled out quantities of Y and D.
+        Works with multiple repetitions.
+
+        Returns
+        -------
+        Y_tilde : :class:`numpy.ndarray`
+            The residual of the regression of Y on X.
+        D_tilde : :class:`numpy.ndarray`
+            The residual of the regression of D on X.
+        """
+        if self.predictions is None:
+            raise ValueError('predictions are None. Call .fit(store_predictions=True) to store the predictions.')
+
+        ml_m = self.predictions["ml_m"].squeeze(axis=2)
+        if self.score == "partialling out":
+            ml_l = self.predictions["ml_l"].squeeze(axis=2)
+            Y_tilde = self._dml_data.y - ml_l
+            D_tilde = self._dml_data.d - ml_m
+        else:
+            assert self.score == "IV-type"
+            ml_g = self.predictions["ml_g"].squeeze(axis=2)
+            Y_tilde = self._dml_data.y - (self.coef * ml_m) - ml_g
+            D_tilde = self._dml_data.d - ml_m
+
+        return Y_tilde, D_tilde
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index bb5a75bd..a9e22faf 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -1326,6 +1326,38 @@ def test_doubleml_exception_cate():
         dml_irm_obj.cate(basis=2)
 
 
+@pytest.mark.ci
+def test_doubleml_exception_plr_cate():
+    dml_plr_obj = DoubleMLPLR(dml_data,
+                              ml_l=Lasso(),
+                              ml_m=Lasso(),
+                              n_folds=2,
+                              n_rep=2)
+    dml_plr_obj.fit()
+    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_plr_obj.cate(basis=2)
+
+    dml_plr_obj = DoubleMLPLR(dml_data,
+                              ml_l=Lasso(),
+                              ml_m=Lasso(),
+                              n_folds=2)
+    dml_plr_obj.fit(store_predictions=False)
+    msg = r'predictions are None. Call .fit\(store_predictions=True\) to store the predictions.'
+    with pytest.raises(ValueError, match=msg):
+        dml_plr_obj.cate(basis=2)
+
+    dml_data_multiple_treat = DoubleMLData(dml_data.data, y_col="y", d_cols=['d', 'X1'])
+    dml_plr_obj_multiple = DoubleMLPLR(dml_data_multiple_treat,
+                                       ml_l=Lasso(),
+                                       ml_m=Lasso(),
+                                       n_folds=2)
+    dml_plr_obj_multiple.fit()
+    msg = 'Only implemented for one treatment. Number of treatments is 2.'
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_plr_obj_multiple.cate(basis=2)
+
+
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index a27ee4a2..6501adf4 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -282,3 +282,25 @@ def test_dml_plr_ols_manual_boot(dml_plr_ols_manual_fixture):
         assert np.allclose(dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap],
                            dml_plr_ols_manual_fixture['boot_t_stat' + bootstrap + '_manual'],
                            rtol=1e-9, atol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_plr_cate_gate():
+    n = 9
+
+    # collect data
+    np.random.seed(42)
+    obj_dml_data = dml.datasets.make_plr_CCDDHNR2018(n_obs=n)
+    ml_l = LinearRegression()
+    ml_g = LinearRegression()
+    ml_m = LinearRegression()
+
+    dml_irm_obj = dml.DoubleMLPLR(obj_dml_data,
+                                  ml_g, ml_m, ml_l,
+                                  n_folds=2,
+                                  score='IV-type',
+                                  dml_procedure='dml2')
+    dml_irm_obj.fit()
+    random_basis = np.random.normal(size=(n, 5))
+    cate = dml_irm_obj.cate(random_basis)
+    assert isinstance(cate, dml.DoubleMLBLP)

From b34b37bb0fbbdac19130b9d0a569029290e6ede0 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 29 Nov 2023 10:49:23 +0100
Subject: [PATCH 053/134] fix documentation of gate

---
 doubleml/double_ml_irm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 4bbe42f7..6ca09dd6 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -441,7 +441,7 @@ def gate(self, groups):
 
         Returns
         -------
-        model : :class:`doubleML.DoubleMLBLPGATE`
+        model : :class:`doubleML.DoubleMLBLP`
             Best linear Predictor model for Group Effects.
         """
         valid_score = ['ATE']

From 339314b5a2d71dcd1cca089d16cea883f750e79c Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 29 Nov 2023 10:50:19 +0100
Subject: [PATCH 054/134] fix store_predictions docstring

---
 doubleml/double_ml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 082c6f3c..4537f010 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -498,7 +498,7 @@ def fit(self, n_jobs_cv=None, store_predictions=True, store_models=False):
 
         store_predictions : bool
             Indicates whether the predictions for the nuisance functions should be stored in ``predictions``.
-            Default is ``False``.
+            Default is ``True``.
 
         store_models : bool
             Indicates whether the fitted models for the nuisance functions should be stored in ``models``. This allows

From d5788e7a13344eca31bbb29f243276fb57ad1110 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 29 Nov 2023 11:43:08 +0100
Subject: [PATCH 055/134] implement cate and gate with basic unit tests

---
 doubleml/double_ml_plr.py                  | 61 ++++++++++++++++++++--
 doubleml/tests/test_doubleml_exceptions.py | 51 +++++++++++++++++-
 doubleml/tests/test_plr.py                 | 32 ++++++++++--
 3 files changed, 132 insertions(+), 12 deletions(-)

diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index c7586f48..e82fbaf2 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 from sklearn.base import clone
@@ -345,7 +346,7 @@ def cate(self, basis):
             Best linear Predictor model.
         """
         if self._dml_data.n_treat > 1:
-            raise NotImplementedError('Only implemented for one treatment. ' +
+            raise NotImplementedError('Only implemented for single treatment. ' +
                                       f'Number of treatments is {str(self._dml_data.n_treat)}.')
         if self.n_rep != 1:
             raise NotImplementedError('Only implemented for one repetition. ' +
@@ -362,6 +363,53 @@ def cate(self, basis):
         model.fit()
         return model
 
+    def gate(self, groups):
+        """
+        Calculate group average treatment effects (GATE) for mutually exclusive groups.
+
+        Parameters
+        ----------
+        groups : :class:`pandas.DataFrame`
+            The group indicator for estimating the best linear predictor.
+            Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
+            and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).
+
+        Returns
+        -------
+        model : :class:`doubleML.DoubleMLBLPGATE`
+            Best linear Predictor model for Group Effects.
+        """
+        if self._dml_data.n_treat > 1:
+            raise NotImplementedError('Only implemented for single treatment. ' +
+                                      f'Number of treatments is {str(self._dml_data.n_treat)}.')
+        if self.n_rep != 1:
+            raise NotImplementedError('Only implemented for one repetition. ' +
+                                      f'Number of repetitions is {str(self.n_rep)}.')
+
+        if not isinstance(groups, pd.DataFrame):
+            raise TypeError('Groups must be of DataFrame type. '
+                            f'Groups of type {str(type(groups))} was passed.')
+        if not all(groups.dtypes == bool) or all(groups.dtypes == int):
+            if groups.shape[1] == 1:
+                groups = pd.get_dummies(groups, prefix='Group', prefix_sep='_')
+            else:
+                raise TypeError('Columns of groups must be of bool type or int type (dummy coded). '
+                                'Alternatively, groups should only contain one column.')
+
+        if any(groups.sum(0) <= 5):
+            warnings.warn('At least one group effect is estimated with less than 6 observations.')
+        Y_tilde, D_tilde = self._partial_out()
+
+        D_basis = groups * D_tilde
+        # fit the best linear predictor for GATE (different confint() method)
+        model = DoubleMLBLP(
+            orth_signal=Y_tilde.reshape(-1),
+            basis=D_basis,
+            is_gate=True,
+        )
+        model.fit()
+        return model
+
     def _partial_out(self):
         """
         Helper function. Returns the partialled out quantities of Y and D.
@@ -377,15 +425,18 @@ def _partial_out(self):
         if self.predictions is None:
             raise ValueError('predictions are None. Call .fit(store_predictions=True) to store the predictions.')
 
+        y = self._dml_data.y.reshape(-1, 1)
+        d = self._dml_data.d.reshape(-1, 1)
         ml_m = self.predictions["ml_m"].squeeze(axis=2)
+
         if self.score == "partialling out":
             ml_l = self.predictions["ml_l"].squeeze(axis=2)
-            Y_tilde = self._dml_data.y - ml_l
-            D_tilde = self._dml_data.d - ml_m
+            Y_tilde = y - ml_l
+            D_tilde = d - ml_m
         else:
             assert self.score == "IV-type"
             ml_g = self.predictions["ml_g"].squeeze(axis=2)
-            Y_tilde = self._dml_data.y - (self.coef * ml_m) - ml_g
-            D_tilde = self._dml_data.d - ml_m
+            Y_tilde = y - (self.coef * ml_m) - ml_g
+            D_tilde = d - ml_m
 
         return Y_tilde, D_tilde
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index a9e22faf..c9dd9e21 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -2,7 +2,7 @@
 import pandas as pd
 import numpy as np
 
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData,\
+from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, \
     DoubleMLClusterData, DoubleMLPQ, DoubleMLLPQ, DoubleMLCVAR, DoubleMLQTE, DoubleMLDID, DoubleMLDIDCS
 from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \
     make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020
@@ -1353,11 +1353,58 @@ def test_doubleml_exception_plr_cate():
                                        ml_m=Lasso(),
                                        n_folds=2)
     dml_plr_obj_multiple.fit()
-    msg = 'Only implemented for one treatment. Number of treatments is 2.'
+    msg = 'Only implemented for single treatment. Number of treatments is 2.'
     with pytest.raises(NotImplementedError, match=msg):
         dml_plr_obj_multiple.cate(basis=2)
 
 
+@pytest.mark.ci
+def test_doubleml_exception_plr_gate():
+    dml_plr_obj = DoubleMLPLR(dml_data,
+                              ml_l=Lasso(),
+                              ml_m=Lasso(),
+                              n_folds=2,
+                              n_rep=2)
+    dml_plr_obj.fit()
+
+    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_plr_obj.gate(groups=2)
+
+    dml_plr_obj = DoubleMLPLR(dml_data,
+                              ml_l=Lasso(),
+                              ml_m=Lasso(),
+                              n_folds=2,
+                              n_rep=1)
+    dml_plr_obj.fit()
+    msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        dml_plr_obj.gate(groups=2)
+    msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). '
+           'Alternatively, groups should only contain one column.')
+    with pytest.raises(TypeError, match=msg):
+        dml_plr_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data.n_obs, 3))))
+    dml_plr_obj = DoubleMLPLR(dml_data,
+                              ml_l=Lasso(),
+                              ml_m=Lasso(),
+                              n_folds=2,
+                              n_rep=1)
+    dml_plr_obj.fit(store_predictions=False)
+    msg = r'predictions are None. Call .fit\(store_predictions=True\) to store the predictions.'
+    with pytest.raises(ValueError, match=msg):
+        dml_plr_obj.gate(groups=pd.DataFrame(np.random.choice([True, False], (dml_data.n_obs, 2))))
+
+    dml_data_multiple_treat = DoubleMLData(dml_data.data, y_col="y", d_cols=['d', 'X1'])
+    dml_plr_obj_multiple = DoubleMLPLR(dml_data_multiple_treat,
+                                       ml_l=Lasso(),
+                                       ml_m=Lasso(),
+                                       n_folds=2)
+    dml_plr_obj_multiple.fit()
+    msg = 'Only implemented for single treatment. Number of treatments is 2.'
+    with pytest.raises(NotImplementedError, match=msg):
+        dml_plr_obj_multiple.gate(groups=pd.DataFrame(np.random.choice([True, False], (dml_data.n_obs, 2))))
+
+
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index 6501adf4..29f69ff5 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -1,7 +1,8 @@
-import numpy as np
 import pytest
 import math
 import scipy
+import numpy as np
+import pandas as pd
 
 from sklearn.base import clone
 
@@ -295,12 +296,33 @@ def test_dml_plr_cate_gate():
     ml_g = LinearRegression()
     ml_m = LinearRegression()
 
-    dml_irm_obj = dml.DoubleMLPLR(obj_dml_data,
+    dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
                                   ml_g, ml_m, ml_l,
                                   n_folds=2,
                                   score='IV-type',
                                   dml_procedure='dml2')
-    dml_irm_obj.fit()
-    random_basis = np.random.normal(size=(n, 5))
-    cate = dml_irm_obj.cate(random_basis)
+    dml_plr_obj.fit()
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
+    cate = dml_plr_obj.cate(random_basis)
     assert isinstance(cate, dml.DoubleMLBLP)
+    assert isinstance(cate.confint(), pd.DataFrame)
+
+    groups_1 = pd.DataFrame(
+        np.column_stack([obj_dml_data.data['X1'] <= 0,
+                         obj_dml_data.data['X1'] > 0.2]),
+        columns=['Group 1', 'Group 2'])
+    msg = ('At least one group effect is estimated with less than 6 observations.')
+    with pytest.warns(UserWarning, match=msg):
+        gate_1 = dml_plr_obj.gate(groups_1)
+    assert isinstance(gate_1, dml.double_ml_blp.DoubleMLBLP)
+    assert isinstance(gate_1.confint(), pd.DataFrame)
+    assert all(gate_1.confint().index == groups_1.columns)
+
+    np.random.seed(42)
+    groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
+    msg = ('At least one group effect is estimated with less than 6 observations.')
+    with pytest.warns(UserWarning, match=msg):
+        gate_2 = dml_plr_obj.gate(groups_2)
+    assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
+    assert isinstance(gate_2.confint(), pd.DataFrame)
+    assert all(gate_2.confint().index == ["Group_1", "Group_2"])

From 062e879d7854b58acbe385fe9251599619b317ef Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 29 Nov 2023 12:05:38 +0100
Subject: [PATCH 056/134] rebase gate on cate method

---
 doubleml/double_ml_irm.py                  | 24 ++++---------
 doubleml/double_ml_plr.py                  | 25 ++++----------
 doubleml/tests/test_doubleml_exceptions.py | 40 +++-------------------
 3 files changed, 19 insertions(+), 70 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 6ca09dd6..6a33c42e 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -397,7 +397,7 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
 
         return res
 
-    def cate(self, basis):
+    def cate(self, basis, is_gate=False):
         """
         Calculate conditional average treatment effects (CATE) for a given basis.
 
@@ -406,6 +406,9 @@ def cate(self, basis):
         basis : :class:`pandas.DataFrame`
             The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
             where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
+        is_gate : bool
+            Indicates whether the basis is constructed for GATEs (dummy-basis).
+            Default is ``False``.
 
         Returns
         -------
@@ -424,8 +427,8 @@ def cate(self, basis):
         # define the orthogonal signal
         orth_signal = self.psi_elements['psi_b'].reshape(-1)
         # fit the best linear predictor
-        model = DoubleMLBLP(orth_signal, basis=basis).fit()
-
+        model = DoubleMLBLP(orth_signal, basis=basis, is_gate=is_gate)
+        model.fit()
         return model
 
     def gate(self, groups):
@@ -444,15 +447,6 @@ def gate(self, groups):
         model : :class:`doubleML.DoubleMLBLP`
             Best linear Predictor model for Group Effects.
         """
-        valid_score = ['ATE']
-        if self.score not in valid_score:
-            raise ValueError('Invalid score ' + self.score + '. ' +
-                             'Valid score ' + ' or '.join(valid_score) + '.')
-
-        if self.n_rep != 1:
-            raise NotImplementedError('Only implemented for one repetition. ' +
-                                      f'Number of repetitions is {str(self.n_rep)}.')
-
         if not isinstance(groups, pd.DataFrame):
             raise TypeError('Groups must be of DataFrame type. '
                             f'Groups of type {str(type(groups))} was passed.')
@@ -467,11 +461,7 @@ def gate(self, groups):
         if any(groups.sum(0) <= 5):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
-        # define the orthogonal signal
-        orth_signal = self.psi_elements['psi_b'].reshape(-1)
-        # fit the best linear predictor for GATE (different confint() method)
-        model = DoubleMLBLP(orth_signal, basis=groups, is_gate=True).fit()
-
+        model = self.cate(groups, is_gate=True)
         return model
 
     def policy_tree(self, features, depth=2, **tree_params):
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index e82fbaf2..acd98f26 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -330,7 +330,7 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
 
         return res
 
-    def cate(self, basis):
+    def cate(self, basis, is_gate=False):
         """
         Calculate conditional average treatment effects (CATE) for a given basis.
 
@@ -339,6 +339,9 @@ def cate(self, basis):
         basis : :class:`pandas.DataFrame`
             The basis for estimating the best linear predictor. Has to have the shape ``(n_obs, d)``,
             where ``n_obs`` is the number of observations and ``d`` is the number of predictors.
+        is_gate : bool
+            Indicates whether the basis is constructed for GATEs (dummy-basis).
+            Default is ``False``.
 
         Returns
         -------
@@ -358,7 +361,7 @@ def cate(self, basis):
         model = DoubleMLBLP(
             orth_signal=Y_tilde.reshape(-1),
             basis=D_basis,
-            is_gate=False,
+            is_gate=is_gate,
         )
         model.fit()
         return model
@@ -376,15 +379,9 @@ def gate(self, groups):
 
         Returns
         -------
-        model : :class:`doubleML.DoubleMLBLPGATE`
+        model : :class:`doubleML.DoubleMLBLP`
             Best linear Predictor model for Group Effects.
         """
-        if self._dml_data.n_treat > 1:
-            raise NotImplementedError('Only implemented for single treatment. ' +
-                                      f'Number of treatments is {str(self._dml_data.n_treat)}.')
-        if self.n_rep != 1:
-            raise NotImplementedError('Only implemented for one repetition. ' +
-                                      f'Number of repetitions is {str(self.n_rep)}.')
 
         if not isinstance(groups, pd.DataFrame):
             raise TypeError('Groups must be of DataFrame type. '
@@ -398,16 +395,8 @@ def gate(self, groups):
 
         if any(groups.sum(0) <= 5):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
-        Y_tilde, D_tilde = self._partial_out()
 
-        D_basis = groups * D_tilde
-        # fit the best linear predictor for GATE (different confint() method)
-        model = DoubleMLBLP(
-            orth_signal=Y_tilde.reshape(-1),
-            basis=D_basis,
-            is_gate=True,
-        )
-        model.fit()
+        model = self.cate(groups, is_gate=True)
         return model
 
     def _partial_out(self):
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index c9dd9e21..b4b6125d 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -1268,10 +1268,11 @@ def test_doubleml_exception_gate():
     msg = "Groups must be of DataFrame type. Groups of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         dml_irm_obj.gate(groups=2)
+    groups = pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3)))
     msg = (r'Columns of groups must be of bool type or int type \(dummy coded\). '
            'Alternatively, groups should only contain one column.')
     with pytest.raises(TypeError, match=msg):
-        dml_irm_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data_irm.n_obs, 3))))
+        dml_irm_obj.gate(groups=groups)
 
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
@@ -1280,10 +1281,10 @@ def test_doubleml_exception_gate():
                               n_folds=5,
                               score='ATTE')
     dml_irm_obj.fit()
-
+    groups = pd.DataFrame(np.random.choice([True, False], size=dml_data_irm.n_obs))
     msg = 'Invalid score ATTE. Valid score ATE.'
     with pytest.raises(ValueError, match=msg):
-        dml_irm_obj.gate(groups=2)
+        dml_irm_obj.gate(groups=groups)
 
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
                               ml_g=Lasso(),
@@ -1296,7 +1297,7 @@ def test_doubleml_exception_gate():
 
     msg = 'Only implemented for one repetition. Number of repetitions is 2.'
     with pytest.raises(NotImplementedError, match=msg):
-        dml_irm_obj.gate(groups=2)
+        dml_irm_obj.gate(groups=groups)
 
 
 @pytest.mark.ci
@@ -1360,17 +1361,6 @@ def test_doubleml_exception_plr_cate():
 
 @pytest.mark.ci
 def test_doubleml_exception_plr_gate():
-    dml_plr_obj = DoubleMLPLR(dml_data,
-                              ml_l=Lasso(),
-                              ml_m=Lasso(),
-                              n_folds=2,
-                              n_rep=2)
-    dml_plr_obj.fit()
-
-    msg = 'Only implemented for one repetition. Number of repetitions is 2.'
-    with pytest.raises(NotImplementedError, match=msg):
-        dml_plr_obj.gate(groups=2)
-
     dml_plr_obj = DoubleMLPLR(dml_data,
                               ml_l=Lasso(),
                               ml_m=Lasso(),
@@ -1384,26 +1374,6 @@ def test_doubleml_exception_plr_gate():
            'Alternatively, groups should only contain one column.')
     with pytest.raises(TypeError, match=msg):
         dml_plr_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data.n_obs, 3))))
-    dml_plr_obj = DoubleMLPLR(dml_data,
-                              ml_l=Lasso(),
-                              ml_m=Lasso(),
-                              n_folds=2,
-                              n_rep=1)
-    dml_plr_obj.fit(store_predictions=False)
-    msg = r'predictions are None. Call .fit\(store_predictions=True\) to store the predictions.'
-    with pytest.raises(ValueError, match=msg):
-        dml_plr_obj.gate(groups=pd.DataFrame(np.random.choice([True, False], (dml_data.n_obs, 2))))
-
-    dml_data_multiple_treat = DoubleMLData(dml_data.data, y_col="y", d_cols=['d', 'X1'])
-    dml_plr_obj_multiple = DoubleMLPLR(dml_data_multiple_treat,
-                                       ml_l=Lasso(),
-                                       ml_m=Lasso(),
-                                       n_folds=2)
-    dml_plr_obj_multiple.fit()
-    msg = 'Only implemented for single treatment. Number of treatments is 2.'
-    with pytest.raises(NotImplementedError, match=msg):
-        dml_plr_obj_multiple.gate(groups=pd.DataFrame(np.random.choice([True, False], (dml_data.n_obs, 2))))
-
 
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():

From 016c845f7ac745c9fddfd27ab8c706117a0e4e14 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 29 Nov 2023 12:08:08 +0100
Subject: [PATCH 057/134] extend gate plr tests

---
 doubleml/tests/test_plr.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index 29f69ff5..fd184be9 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -286,7 +286,7 @@ def test_dml_plr_ols_manual_boot(dml_plr_ols_manual_fixture):
 
 
 @pytest.mark.ci
-def test_dml_plr_cate_gate():
+def test_dml_plr_cate_gate(score, dml_procedure):
     n = 9
 
     # collect data
@@ -299,8 +299,8 @@ def test_dml_plr_cate_gate():
     dml_plr_obj = dml.DoubleMLPLR(obj_dml_data,
                                   ml_g, ml_m, ml_l,
                                   n_folds=2,
-                                  score='IV-type',
-                                  dml_procedure='dml2')
+                                  score=score,
+                                  dml_procedure=dml_procedure)
     dml_plr_obj.fit()
     random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 5)))
     cate = dml_plr_obj.cate(random_basis)

From fe13dee4e743b0c8564ee2e54cc7704ac9179a20 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 29 Nov 2023 14:54:39 +0100
Subject: [PATCH 058/134] fix unit test for ext. preds. for DID CS model

---
 doubleml/tests/test_didcs_external_predictions.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index 6effc805..0eed900a 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -5,6 +5,7 @@
 from doubleml import DoubleMLData, DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
+from ._utils import draw_smpls
 
 
 @pytest.fixture(scope="module", params=["observational", "experimental"])
@@ -26,8 +27,17 @@ def n_rep(request):
 def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions = {"d": {}}
     dml_data = make_did_SZ2020(n_obs=500, cross_sectional_data=True, return_type="DoubleMLData")
-    kwargs = {"obj_dml_data": dml_data, "score": did_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": did_score,
+        "n_rep": n_rep,
+        "n_folds": 5,
+        "dml_procedure": dml_procedure,
+        "draw_sample_splitting": False
+    }
     DMLDIDCS = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    DMLDIDCS.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     DMLDIDCS.fit(store_predictions=True)
 
@@ -38,6 +48,7 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_m"] = DMLDIDCS.predictions["ml_m"][:, :, 0]
 
     DMLDIDCS_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    DMLDIDCS_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     DMLDIDCS_ext.fit(external_predictions=ext_predictions)
 

From 73e87b12c22c5ad04ac78cd136627758f4373f30 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 29 Nov 2023 14:55:05 +0100
Subject: [PATCH 059/134] fix unit test for ext. preds. for PQ model

---
 doubleml/tests/test_pq_external_predictions.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index c21bd1c8..4468db83 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -5,6 +5,7 @@
 from doubleml import DoubleMLPQ, DoubleMLData
 from doubleml.datasets import make_irm_data
 from doubleml.utils import dummy_regressor, dummy_classifier
+from ._utils import draw_smpls
 
 
 @pytest.fixture(scope="module", params=["dml1", "dml2"])
@@ -29,6 +30,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
     data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
 
     dml_data = DoubleMLData(data, "y", "d")
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
 
     kwargs = {
         "obj_dml_data": dml_data,
@@ -36,12 +38,14 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
         "n_rep": n_rep,
         "dml_procedure": dml_procedure,
         "normalize_ipw": normalize_ipw,
+        "draw_sample_splitting": False
     }
 
     ml_g = LogisticRegression()
     ml_m = LogisticRegression()
 
     DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    DMLPQ.set_sample_splitting(all_smpls)
     np.random.seed(3141)
 
     DMLPQ.fit(store_predictions=True)
@@ -50,6 +54,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
 
     DMLPLQ_ext = DoubleMLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    DMLPLQ_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
     DMLPLQ_ext.fit(external_predictions=ext_predictions)

From 6434846eca24f6264d494bf5cdbe178fa6275fa1 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 29 Nov 2023 16:52:02 +0100
Subject: [PATCH 060/134] fix format

---
 doubleml/tests/test_doubleml_exceptions.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index b4b6125d..c7b61d12 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -1375,6 +1375,7 @@ def test_doubleml_exception_plr_gate():
     with pytest.raises(TypeError, match=msg):
         dml_plr_obj.gate(groups=pd.DataFrame(np.random.normal(0, 1, size=(dml_data.n_obs, 3))))
 
+
 @pytest.mark.ci
 def test_double_ml_exception_evaluate_learner():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,

From 4ec3e480e99501b2aec7af648810c52936db9458 Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Thu, 30 Nov 2023 13:07:34 +0100
Subject: [PATCH 061/134] Revert "add weights to linear score"

This reverts commit ce26165557e39f7572a5c7529e20656c2225e00c.
---
 doubleml/double_ml_score_mixins.py | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/doubleml/double_ml_score_mixins.py b/doubleml/double_ml_score_mixins.py
index 922d28fc..809efbde 100644
--- a/doubleml/double_ml_score_mixins.py
+++ b/doubleml/double_ml_score_mixins.py
@@ -34,17 +34,11 @@ def _score_element_names(self):
         return ['psi_a', 'psi_b']
 
     def _compute_score(self, psi_elements, coef):
-        if hasattr(self, "_weights"):
-            psi = self.weights * (psi_elements['psi_a'] * coef + psi_elements['psi_b'])
-        else:
-            psi = psi_elements['psi_a'] * coef + psi_elements['psi_b']
+        psi = psi_elements['psi_a'] * coef + psi_elements['psi_b']
         return psi
 
     def _compute_score_deriv(self, psi_elements, coef):
-        if hasattr(self, "_weights"):
-            return self.weights * psi_elements['psi_a']
-        else:
-            return psi_elements['psi_a']
+        return psi_elements['psi_a']
 
     def _est_coef(self, psi_elements, smpls=None, scaling_factor=None, inds=None):
         psi_a = psi_elements['psi_a']
@@ -56,10 +50,7 @@ def _est_coef(self, psi_elements, smpls=None, scaling_factor=None, inds=None):
         # check whether we have cluster data and dml2
         is_dml2_and_cluster = self._is_cluster_data and (self.dml_procedure == 'dml2')
         if not is_dml2_and_cluster:
-            if hasattr(self, "_weights"):
-                coef = - np.average(psi_b/psi_a, weights = self.weights)
-            else:
-                coef = - np.mean(psi_b) / np.mean(psi_a)
+            coef = - np.mean(psi_b) / np.mean(psi_a)
         # for cluster and dml2 we need the smpls and the scaling factors
         else:
             assert smpls is not None

From b2f09589b193ebe07603bfee2c7a9dbad55697ae Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 30 Nov 2023 14:34:43 +0100
Subject: [PATCH 062/134] add ext. preds. for LPQ model (only for DML2)

---
 doubleml/double_ml_lpq.py                     | 713 ++++++++++--------
 .../tests/test_lpq_external_predictions.py    |  74 ++
 2 files changed, 492 insertions(+), 295 deletions(-)
 create mode 100644 doubleml/tests/test_lpq_external_predictions.py

diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 30e4e730..a3255b6f 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -9,11 +9,19 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from .double_ml_data import DoubleMLData
 
-from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _cond_targets, \
-    _get_bracket_guess, _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score
+from ._utils import (
+    _dml_cv_predict,
+    _trimm,
+    _predict_zero_one_propensity,
+    _cond_targets,
+    _get_bracket_guess,
+    _default_kde,
+    _normalize_ipw,
+    _dml_tune,
+    _solve_ipw_score,
+)
 from ._utils_resampling import DoubleMLResampling
-from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, \
-    _check_quantile
+from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, _check_quantile
 
 
 class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
@@ -100,29 +108,25 @@ class DoubleMLLPQ(NonLinearScoreMixin, DoubleML):
     d  0.217244  0.636453  0.341336  0.73285 -1.03018  1.464668
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment=1,
-                 quantile=0.5,
-                 n_folds=5,
-                 n_rep=1,
-                 score='LPQ',
-                 dml_procedure='dml2',
-                 normalize_ipw=True,
-                 kde=None,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True,
-                 apply_cross_fitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         dml_procedure,
-                         draw_sample_splitting,
-                         apply_cross_fitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment=1,
+        quantile=0.5,
+        n_folds=5,
+        n_rep=1,
+        score="LPQ",
+        dml_procedure="dml2",
+        normalize_ipw=True,
+        kde=None,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+        apply_cross_fitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, dml_procedure, draw_sample_splitting, apply_cross_fitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -130,21 +134,21 @@ def __init__(self,
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError('kde should be either a callable or None. '
-                                '%r was passed.' % kde)
+                raise TypeError("kde should be either a callable or None. " "%r was passed." % kde)
             self._kde = kde
         self._normalize_ipw = normalize_ipw
 
         self._check_data(self._dml_data)
 
-        valid_score = ['LPQ']
+        valid_score = ["LPQ"]
         _check_score(self.score, valid_score, allow_callable=False)
         _check_quantile(self.quantile)
         _check_treatment(self.treatment)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
         # initialize starting values and bounds
         self._coef_bounds = (self._dml_data.y.min(), self._dml_data.y.max())
@@ -155,24 +159,34 @@ def __init__(self,
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
-        _ = self._check_learner(ml_g, 'ml_g', regressor=False, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_m_z': clone(ml_m),
-                         'ml_g_du_z0': clone(ml_g), 'ml_g_du_z1': clone(ml_g),
-                         'ml_m_d_z0': clone(ml_m), 'ml_m_d_z1': clone(ml_m)}
-        self._predict_method = {'ml_m_z': 'predict_proba',
-                                'ml_g_du_z0': 'predict_proba', 'ml_g_du_z1': 'predict_proba',
-                                'ml_m_d_z0': 'predict_proba', 'ml_m_d_z1': 'predict_proba'}
+        _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {
+            "ml_m_z": clone(ml_m),
+            "ml_g_du_z0": clone(ml_g),
+            "ml_g_du_z1": clone(ml_g),
+            "ml_m_d_z0": clone(ml_m),
+            "ml_m_d_z1": clone(ml_m),
+        }
+        self._predict_method = {
+            "ml_m_z": "predict_proba",
+            "ml_g_du_z0": "predict_proba",
+            "ml_g_du_z1": "predict_proba",
+            "ml_m_d_z0": "predict_proba",
+            "ml_m_d_z1": "predict_proba",
+        }
 
         self._initialize_ml_nuisance_params()
 
         if draw_sample_splitting:
             strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.z.reshape(-1, 1)
-            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                    n_rep=self.n_rep,
-                                                    n_obs=self._dml_data.n_obs,
-                                                    apply_cross_fitting=self.apply_cross_fitting,
-                                                    stratify=strata)
+            obj_dml_resampling = DoubleMLResampling(
+                n_folds=self.n_folds,
+                n_rep=self.n_rep,
+                n_obs=self._dml_data.n_obs,
+                apply_cross_fitting=self.apply_cross_fitting,
+                stratify=strata,
+            )
             self._smpls = obj_dml_resampling.split_samples()
 
     @property
@@ -219,33 +233,33 @@ def trimming_threshold(self):
 
     @property
     def _score_element_names(self):
-        return ['ind_d', 'm_z', 'g_du_z0', 'g_du_z1', 'y', 'z', 'comp_prob']
+        return ["ind_d", "m_z", "g_du_z0", "g_du_z1", "y", "z", "comp_prob"]
 
     def _compute_ipw_score(self, theta, d, y, prop, z, comp_prob):
         sign = 2 * self.treatment - 1.0
         weights = sign * (z / prop - (1 - z) / (1 - prop)) / comp_prob
         u = (d == self._treatment) * (y <= theta)
-        v = -1. * self.quantile
+        v = -1.0 * self.quantile
         score = weights * u + v
         return score
 
     def _compute_score(self, psi_elements, coef, inds=None):
         sign = 2 * self.treatment - 1.0
-        ind_d = psi_elements['ind_d']
-        m_z = psi_elements['m_z']
-        g_du_z0 = psi_elements['g_du_z0']
-        g_du_z1 = psi_elements['g_du_z1']
-        y = psi_elements['y']
-        z = psi_elements['z']
-        comp_prob = psi_elements['comp_prob']
+        ind_d = psi_elements["ind_d"]
+        m_z = psi_elements["m_z"]
+        g_du_z0 = psi_elements["g_du_z0"]
+        g_du_z1 = psi_elements["g_du_z1"]
+        y = psi_elements["y"]
+        z = psi_elements["z"]
+        comp_prob = psi_elements["comp_prob"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            m_z = psi_elements['m_z']
-            g_du_z0 = psi_elements['g_du_z0'][inds]
-            g_du_z1 = psi_elements['g_du_z1'][inds]
-            y = psi_elements['y'][inds]
-            z = psi_elements['z'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            m_z = psi_elements["m_z"]
+            g_du_z0 = psi_elements["g_du_z0"][inds]
+            g_du_z1 = psi_elements["g_du_z1"][inds]
+            y = psi_elements["y"][inds]
+            z = psi_elements["z"][inds]
 
         score1 = g_du_z1 - g_du_z0
         score2 = (z / m_z) * (ind_d * (y <= coef) - g_du_z1)
@@ -255,17 +269,17 @@ def _compute_score(self, psi_elements, coef, inds=None):
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
         sign = 2 * self.treatment - 1.0
-        ind_d = psi_elements['ind_d']
-        y = psi_elements['y']
-        m_z = psi_elements['m_z']
-        z = psi_elements['z']
-        comp_prob = psi_elements['comp_prob']
+        ind_d = psi_elements["ind_d"]
+        y = psi_elements["y"]
+        m_z = psi_elements["m_z"]
+        z = psi_elements["z"]
+        comp_prob = psi_elements["comp_prob"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            y = psi_elements['y'][inds]
-            m_z = psi_elements['m_z'][inds]
-            z = psi_elements['z'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            y = psi_elements["y"][inds]
+            m_z = psi_elements["m_z"][inds]
+            z = psi_elements["z"][inds]
 
         score_weights = sign * ((z / m_z) - (1 - z) / (1 - m_z)) * ind_d / comp_prob
         u = (y - coef).reshape(-1, 1)
@@ -274,178 +288,225 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
         return deriv
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in ['ml_m_z', 'ml_g_du_z0', 'ml_g_du_z1',
-                                        'ml_m_d_z0', 'ml_m_d_z1']}
+        self._params = {
+            learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
+            for learner in ["ml_m_z", "ml_g_du_z0", "ml_g_du_z1", "ml_m_d_z0", "ml_m_d_z1"]
+        }
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                         force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
+
+        m_z = external_predictions["ml_m_z"] is not None
+        m_d_d0 = external_predictions["ml_m_d_z0"] is not None
+        m_d_d1 = external_predictions["ml_m_d_z1"] is not None
+        g_du_z0 = external_predictions["ml_g_du_z0"] is not None
+        g_du_z1 = external_predictions["ml_g_du_z1"] is not None
+        ext_preds = [m_z, m_d_d0, m_d_d1, g_du_z0, g_du_z1]
 
         # create strata for splitting
         strata = self._dml_data.d.reshape(-1, 1) + 2 * self._dml_data.z.reshape(-1, 1)
 
         # initialize nuisance predictions, targets and models
-        m_z_hat = {'models': None,
-                   'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                   'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                   }
-        m_d_z0_hat = copy.deepcopy(m_z_hat)
-        m_d_z1_hat = copy.deepcopy(m_z_hat)
-        g_du_z0_hat = copy.deepcopy(m_z_hat)
-        g_du_z1_hat = copy.deepcopy(m_z_hat)
-
-        # initialize models
-        fitted_models = {}
-        for learner in self.params_names:
-            # set nuisance model parameters
-            est_params = self._get_params(learner)
-            if est_params is not None:
-                fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                          for i_fold in range(self.n_folds)]
-            else:
-                fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+        if not all(ext_preds):
+            m_z_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            m_d_z0_hat = copy.deepcopy(m_z_hat)
+            m_d_z1_hat = copy.deepcopy(m_z_hat)
+            g_du_z0_hat = copy.deepcopy(m_z_hat)
+            g_du_z1_hat = copy.deepcopy(m_z_hat)
+
+            # initialize models
+            fitted_models = {}
+            for learner in self.params_names:
+                # set nuisance model parameters
+                est_params = self._get_params(learner)
+                if est_params is not None:
+                    fitted_models[learner] = [
+                        clone(self._learner[learner]).set_params(**est_params[i_fold]) for i_fold in range(self.n_folds)
+                    ]
+                else:
+                    fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
+            ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
+        elif any(ext_preds) and not any(ext_preds):
+            raise ValueError("External predictions for all estimations or for none are required.")
+        else:
+            m_z_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_z"],
+            }
+            m_d_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_d_z0"],
+            }
+            m_d_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m_d_z1"],
+            }
+            g_du_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g_du_z0"],
+            }
+            g_du_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g_du_z1"],
+            }
 
-        ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
         # calculate nuisance functions over different folds
-        for i_fold in range(self.n_folds):
-            train_inds = smpls[i_fold][0]
-            test_inds = smpls[i_fold][1]
-
-            # start nested crossfitting
-            train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                          random_state=42, stratify=strata[train_inds])
-            smpls_prelim = [(train, test) for train, test in
-                            StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=strata[train_inds_1])]
-
-            d_train_1 = d[train_inds_1]
-            y_train_1 = y[train_inds_1]
-            x_train_1 = x[train_inds_1, :]
-            z_train_1 = z[train_inds_1]
-
-            # preliminary propensity for z
-            ml_m_z_prelim = clone(fitted_models['ml_m_z'][i_fold])
-            m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1,
-                                             method='predict_proba', smpls=smpls_prelim)['preds']
-
-            m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold)
-            if self._normalize_ipw:
-                m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1)
-
-            # propensity for d == 1 cond. on z == 0 (training set 1)
-            z0_train_1 = z_train_1 == 0
-            x_z0_train_1 = x_train_1[z0_train_1, :]
-            d_z0_train_1 = d_train_1[z0_train_1]
-            ml_m_d_z0_prelim = clone(fitted_models['ml_m_d_z0'][i_fold])
-            ml_m_d_z0_prelim.fit(x_z0_train_1, d_z0_train_1)
-            m_d_z0_hat_prelim = _predict_zero_one_propensity(ml_m_d_z0_prelim, x_train_1)
-
-            # propensity for d == 1 cond. on z == 1 (training set 1)
-            z1_train_1 = z_train_1 == 1
-            x_z1_train_1 = x_train_1[z1_train_1, :]
-            d_z1_train_1 = d_train_1[z1_train_1]
-            ml_m_d_z1_prelim = clone(fitted_models['ml_m_d_z1'][i_fold])
-            ml_m_d_z1_prelim.fit(x_z1_train_1, d_z1_train_1)
-            m_d_z1_hat_prelim = _predict_zero_one_propensity(ml_m_d_z1_prelim, x_train_1)
-
-            # preliminary estimate of theta_2_aux
-            comp_prob_prelim = np.mean(m_d_z1_hat_prelim - m_d_z0_hat_prelim
-                                       + z_train_1 / m_z_hat_prelim * (d_train_1 - m_d_z1_hat_prelim)
-                                       - (1 - z_train_1) / (1 - m_z_hat_prelim) * (d_train_1 - m_d_z0_hat_prelim))
-
-            # preliminary ipw estimate
-            def ipw_score(theta):
-                res = np.mean(self._compute_ipw_score(theta, d_train_1, y_train_1, m_z_hat_prelim,
-                                                      z_train_1, comp_prob_prelim))
-                return res
-
-            _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
-            ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
-            ipw_vec[i_fold] = ipw_est
-
-            # use the preliminary estimates to fit the nuisance parameters on train_2
-            d_train_2 = d[train_inds_2]
-            y_train_2 = y[train_inds_2]
-            x_train_2 = x[train_inds_2, :]
-            z_train_2 = z[train_inds_2]
-
-            # define test observations
-            d_test = d[test_inds]
-            y_test = y[test_inds]
-            x_test = x[test_inds, :]
-            z_test = z[test_inds]
-
-            # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 0
-            z0_train_2 = z_train_2 == 0
-            x_z0_train_2 = x_train_2[z0_train_2, :]
-            du_z0_train_2 = (d_train_2[z0_train_2] == self._treatment) * (y_train_2[z0_train_2] <= ipw_est)
-            fitted_models['ml_g_du_z0'][i_fold].fit(x_z0_train_2, du_z0_train_2)
-            g_du_z0_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g_du_z0'][i_fold], x_test)
-
-            # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 1
-            z1_train_2 = z_train_2 == 1
-            x_z1_train_2 = x_train_2[z1_train_2, :]
-            du_z1_train_2 = (d_train_2[z1_train_2] == self._treatment) * (y_train_2[z1_train_2] <= ipw_est)
-            fitted_models['ml_g_du_z1'][i_fold].fit(x_z1_train_2, du_z1_train_2)
-            g_du_z1_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g_du_z1'][i_fold], x_test)
-
-            # the predictions of both should only be evaluated conditional on z == 0 or z == 1
-            test_inds_z0 = test_inds[z_test == 0]
-            test_inds_z1 = test_inds[z_test == 1]
-            g_du_z0_hat['targets'][test_inds_z0] = (1.0 * (d_test[z_test == 0] == self._treatment) *
-                                                    (y_test[z_test == 0] <= ipw_est))
-            g_du_z1_hat['targets'][test_inds_z1] = (1.0 * (d_test[z_test == 1] == self._treatment) *
-                                                    (y_test[z_test == 1] <= ipw_est))
-
-            # refit nuisance elements for the local potential quantile
-            z_train = z[train_inds]
-            x_train = x[train_inds]
-            d_train = d[train_inds]
-
-            # refit propensity for z (whole training set)
-            fitted_models['ml_m_z'][i_fold].fit(x_train, z_train)
-            m_z_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_z'][i_fold], x_test)
-
-            # refit propensity for d == 1 cond. on z == 0 (whole training set)
-            z0_train = z_train == 0
-            x_z0_train = x_train[z0_train, :]
-            d_z0_train = d_train[z0_train]
-            fitted_models['ml_m_d_z0'][i_fold].fit(x_z0_train, d_z0_train)
-            m_d_z0_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_d_z0'][i_fold], x_test)
-
-            # propensity for d == 1 cond. on z == 1 (whole training set)
-            x_z1_train = x_train[z_train == 1, :]
-            d_z1_train = d_train[z_train == 1]
-            fitted_models['ml_m_d_z1'][i_fold].fit(x_z1_train, d_z1_train)
-            m_d_z1_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m_d_z1'][i_fold], x_test)
+        if not all(ext_preds):
+            for i_fold in range(self.n_folds):
+                train_inds = smpls[i_fold][0]
+                test_inds = smpls[i_fold][1]
+
+                # start nested crossfitting
+                train_inds_1, train_inds_2 = train_test_split(
+                    train_inds, test_size=0.5, random_state=42, stratify=strata[train_inds]
+                )
+                smpls_prelim = [
+                    (train, test)
+                    for train, test in StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=strata[train_inds_1])
+                ]
+
+                d_train_1 = d[train_inds_1]
+                y_train_1 = y[train_inds_1]
+                x_train_1 = x[train_inds_1, :]
+                z_train_1 = z[train_inds_1]
+
+                # preliminary propensity for z
+                ml_m_z_prelim = clone(fitted_models["ml_m_z"][i_fold])
+                m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim)[
+                    "preds"
+                ]
+
+                m_z_hat_prelim = _trimm(m_z_hat_prelim, self.trimming_rule, self.trimming_threshold)
+                if self._normalize_ipw:
+                    m_z_hat_prelim = _normalize_ipw(m_z_hat_prelim, z_train_1)
+
+                # propensity for d == 1 cond. on z == 0 (training set 1)
+                z0_train_1 = z_train_1 == 0
+                x_z0_train_1 = x_train_1[z0_train_1, :]
+                d_z0_train_1 = d_train_1[z0_train_1]
+                ml_m_d_z0_prelim = clone(fitted_models["ml_m_d_z0"][i_fold])
+                ml_m_d_z0_prelim.fit(x_z0_train_1, d_z0_train_1)
+                m_d_z0_hat_prelim = _predict_zero_one_propensity(ml_m_d_z0_prelim, x_train_1)
+
+                # propensity for d == 1 cond. on z == 1 (training set 1)
+                z1_train_1 = z_train_1 == 1
+                x_z1_train_1 = x_train_1[z1_train_1, :]
+                d_z1_train_1 = d_train_1[z1_train_1]
+                ml_m_d_z1_prelim = clone(fitted_models["ml_m_d_z1"][i_fold])
+                ml_m_d_z1_prelim.fit(x_z1_train_1, d_z1_train_1)
+                m_d_z1_hat_prelim = _predict_zero_one_propensity(ml_m_d_z1_prelim, x_train_1)
+
+                # preliminary estimate of theta_2_aux
+                comp_prob_prelim = np.mean(
+                    m_d_z1_hat_prelim
+                    - m_d_z0_hat_prelim
+                    + z_train_1 / m_z_hat_prelim * (d_train_1 - m_d_z1_hat_prelim)
+                    - (1 - z_train_1) / (1 - m_z_hat_prelim) * (d_train_1 - m_d_z0_hat_prelim)
+                )
+
+                # preliminary ipw estimate
+                def ipw_score(theta):
+                    res = np.mean(
+                        self._compute_ipw_score(theta, d_train_1, y_train_1, m_z_hat_prelim, z_train_1, comp_prob_prelim)
+                    )
+                    return res
+
+                _, bracket_guess = _get_bracket_guess(ipw_score, self._coef_start_val, self._coef_bounds)
+                ipw_est = _solve_ipw_score(ipw_score=ipw_score, bracket_guess=bracket_guess)
+                ipw_vec[i_fold] = ipw_est
+
+                # use the preliminary estimates to fit the nuisance parameters on train_2
+                d_train_2 = d[train_inds_2]
+                y_train_2 = y[train_inds_2]
+                x_train_2 = x[train_inds_2, :]
+                z_train_2 = z[train_inds_2]
+
+                # define test observations
+                d_test = d[test_inds]
+                y_test = y[test_inds]
+                x_test = x[test_inds, :]
+                z_test = z[test_inds]
+
+                # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 0
+                z0_train_2 = z_train_2 == 0
+                x_z0_train_2 = x_train_2[z0_train_2, :]
+                du_z0_train_2 = (d_train_2[z0_train_2] == self._treatment) * (y_train_2[z0_train_2] <= ipw_est)
+                fitted_models["ml_g_du_z0"][i_fold].fit(x_z0_train_2, du_z0_train_2)
+                g_du_z0_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g_du_z0"][i_fold], x_test)
+
+                # propensity for (D == treatment)*Ind(Y <= ipq_est) cond. on z == 1
+                z1_train_2 = z_train_2 == 1
+                x_z1_train_2 = x_train_2[z1_train_2, :]
+                du_z1_train_2 = (d_train_2[z1_train_2] == self._treatment) * (y_train_2[z1_train_2] <= ipw_est)
+                fitted_models["ml_g_du_z1"][i_fold].fit(x_z1_train_2, du_z1_train_2)
+                g_du_z1_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g_du_z1"][i_fold], x_test)
+
+                # the predictions of both should only be evaluated conditional on z == 0 or z == 1
+                test_inds_z0 = test_inds[z_test == 0]
+                test_inds_z1 = test_inds[z_test == 1]
+                g_du_z0_hat["targets"][test_inds_z0] = (
+                    1.0 * (d_test[z_test == 0] == self._treatment) * (y_test[z_test == 0] <= ipw_est)
+                )
+                g_du_z1_hat["targets"][test_inds_z1] = (
+                    1.0 * (d_test[z_test == 1] == self._treatment) * (y_test[z_test == 1] <= ipw_est)
+                )
+
+                # refit nuisance elements for the local potential quantile
+                z_train = z[train_inds]
+                x_train = x[train_inds]
+                d_train = d[train_inds]
+
+                # refit propensity for z (whole training set)
+                fitted_models["ml_m_z"][i_fold].fit(x_train, z_train)
+                m_z_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_z"][i_fold], x_test)
+
+                # refit propensity for d == 1 cond. on z == 0 (whole training set)
+                z0_train = z_train == 0
+                x_z0_train = x_train[z0_train, :]
+                d_z0_train = d_train[z0_train]
+                fitted_models["ml_m_d_z0"][i_fold].fit(x_z0_train, d_z0_train)
+                m_d_z0_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_d_z0"][i_fold], x_test)
+
+                # propensity for d == 1 cond. on z == 1 (whole training set)
+                x_z1_train = x_train[z_train == 1, :]
+                d_z1_train = d_train[z_train == 1]
+                fitted_models["ml_m_d_z1"][i_fold].fit(x_z1_train, d_z1_train)
+                m_d_z1_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m_d_z1"][i_fold], x_test)
 
         # save targets and models
-        m_z_hat['targets'] = z
+        m_z_hat["targets"] = z
 
         # set targets to relevant subsample
-        g_du_z0_hat['targets'] = _cond_targets(g_du_z0_hat['targets'], cond_sample=(z == 0))
-        g_du_z1_hat['targets'] = _cond_targets(g_du_z1_hat['targets'], cond_sample=(z == 1))
+        g_du_z0_hat["targets"] = _cond_targets(g_du_z0_hat["targets"], cond_sample=(z == 0))
+        g_du_z1_hat["targets"] = _cond_targets(g_du_z1_hat["targets"], cond_sample=(z == 1))
 
         # the predictions of both should only be evaluated conditional on z == 0 or z == 1
-        m_d_z0_hat['targets'] = _cond_targets(d, cond_sample=(z == 0))
-        m_d_z0_hat['targets'] = _cond_targets(d, cond_sample=(z == 1))
+        m_d_z0_hat["targets"] = _cond_targets(d, cond_sample=(z == 0))
+        m_d_z0_hat["targets"] = _cond_targets(d, cond_sample=(z == 1))
 
         if return_models:
-            m_z_hat['models'] = fitted_models['ml_m_z']
-            m_d_z0_hat['models'] = fitted_models['ml_m_d_z0']
-            m_d_z1_hat['models'] = fitted_models['ml_m_d_z1']
-            g_du_z0_hat['models'] = fitted_models['ml_g_du_z0']
-            g_du_z1_hat['models'] = fitted_models['ml_g_du_z1']
+            m_z_hat["models"] = fitted_models["ml_m_z"]
+            m_d_z0_hat["models"] = fitted_models["ml_m_d_z0"]
+            m_d_z1_hat["models"] = fitted_models["ml_m_d_z1"]
+            g_du_z0_hat["models"] = fitted_models["ml_g_du_z0"]
+            g_du_z1_hat["models"] = fitted_models["ml_g_du_z1"]
 
         # clip propensities
-        m_z_hat_adj = _trimm(m_z_hat['preds'], self.trimming_rule, self.trimming_threshold)
+        m_z_hat_adj = _trimm(m_z_hat["preds"], self.trimming_rule, self.trimming_threshold)
 
         if self._normalize_ipw:
-            if self.dml_procedure == 'dml1':
+            if self.dml_procedure == "dml1":
                 for _, test_index in smpls:
                     m_z_hat_adj[test_index] = _normalize_ipw(m_z_hat_adj[test_index], z[test_index])
             else:
@@ -453,49 +514,60 @@ def ipw_score(theta):
 
         # this could be adjusted to be compatible with dml1
         # estimate final nuisance parameter
-        comp_prob_hat = np.mean(m_d_z1_hat['preds'] - m_d_z0_hat['preds']
-                                + z / m_z_hat_adj * (d - m_d_z1_hat['preds'])
-                                - (1 - z) / (1 - m_z_hat_adj) * (d - m_d_z0_hat['preds']))
-
-        # readjust start value for minimization
-        self._coef_start_val = np.mean(ipw_vec)
-
-        psi_elements = {'ind_d': d == self._treatment, 'm_z': m_z_hat_adj,
-                        'g_du_z0': g_du_z0_hat['preds'], 'g_du_z1': g_du_z1_hat['preds'],
-                        'y': y, 'z': z, 'comp_prob': comp_prob_hat}
-        preds = {'predictions': {'ml_m_z':  m_z_hat['preds'],
-                                 'ml_m_d_z0': m_d_z0_hat['preds'],
-                                 'ml_m_d_z1': m_d_z1_hat['preds'],
-                                 'ml_g_du_z0': g_du_z0_hat['preds'],
-                                 'ml_g_du_z1': g_du_z1_hat['preds']},
-                 'targets': {'ml_m_z':  m_z_hat['targets'],
-                             'ml_m_d_z0': m_d_z0_hat['targets'],
-                             'ml_m_d_z1': m_d_z1_hat['targets'],
-                             'ml_g_du_z0': g_du_z0_hat['targets'],
-                             'ml_g_du_z1': g_du_z1_hat['targets']},
-                 'models': {'ml_m_z':  m_z_hat['models'],
-                            'ml_m_d_z0': m_d_z0_hat['models'],
-                            'ml_m_d_z1': m_d_z1_hat['models'],
-                            'ml_g_du_z0': g_du_z0_hat['models'],
-                            'ml_g_du_z1': g_du_z1_hat['models']}
-                 }
+        comp_prob_hat = np.mean(
+            m_d_z1_hat["preds"]
+            - m_d_z0_hat["preds"]
+            + z / m_z_hat_adj * (d - m_d_z1_hat["preds"])
+            - (1 - z) / (1 - m_z_hat_adj) * (d - m_d_z0_hat["preds"])
+        )
+
+        if not all(ext_preds):
+            # readjust start value for minimization
+            self._coef_start_val = np.mean(ipw_vec)
+
+        psi_elements = {
+            "ind_d": d == self._treatment,
+            "m_z": m_z_hat_adj,
+            "g_du_z0": g_du_z0_hat["preds"],
+            "g_du_z1": g_du_z1_hat["preds"],
+            "y": y,
+            "z": z,
+            "comp_prob": comp_prob_hat,
+        }
+        preds = {
+            "predictions": {
+                "ml_m_z": m_z_hat["preds"],
+                "ml_m_d_z0": m_d_z0_hat["preds"],
+                "ml_m_d_z1": m_d_z1_hat["preds"],
+                "ml_g_du_z0": g_du_z0_hat["preds"],
+                "ml_g_du_z1": g_du_z1_hat["preds"],
+            },
+            "targets": {
+                "ml_m_z": m_z_hat["targets"],
+                "ml_m_d_z0": m_d_z0_hat["targets"],
+                "ml_m_d_z1": m_d_z1_hat["targets"],
+                "ml_g_du_z0": g_du_z0_hat["targets"],
+                "ml_g_du_z1": g_du_z1_hat["targets"],
+            },
+            "models": {
+                "ml_m_z": m_z_hat["models"],
+                "ml_m_d_z0": m_d_z0_hat["models"],
+                "ml_m_d_z1": m_d_z1_hat["models"],
+                "ml_g_du_z0": g_du_z0_hat["models"],
+                "ml_g_du_z1": g_du_z1_hat["models"],
+            },
+        }
         return psi_elements, preds
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_m_z': None,
-                               'ml_m_d_z0': None,
-                               'ml_m_d_z1': None,
-                               'ml_g_du_z0': None,
-                               'ml_g_du_z1': None}
+            scoring_methods = {"ml_m_z": None, "ml_m_d_z0": None, "ml_m_d_z1": None, "ml_g_du_z0": None, "ml_g_du_z1": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_z0 = [np.intersect1d(np.where(z == 0)[0], train) for train, _ in smpls]
@@ -504,21 +576,66 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         approx_quant = np.quantile(y[d == self.treatment], self.quantile)
         du = (d == self.treatment) * (y <= approx_quant)
 
-        m_z_tune_res = _dml_tune(z, x, train_inds,
-                                 self._learner['ml_m_z'], param_grids['ml_m_z'], scoring_methods['ml_m_z'],
-                                 n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_d_z0_tune_res = _dml_tune(d, x, train_inds_z0,
-                                    self._learner['ml_m_d_z0'], param_grids['ml_m_d_z0'], scoring_methods['ml_m_d_z0'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        m_d_z1_tune_res = _dml_tune(d, x, train_inds_z1,
-                                    self._learner['ml_m_d_z1'], param_grids['ml_m_d_z1'], scoring_methods['ml_m_d_z1'],
-                                    n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g_du_z0_tune_res = _dml_tune(du, x, train_inds_z0,
-                                     self._learner['ml_g_du_z0'], param_grids['ml_g_du_z0'], scoring_methods['ml_g_du_z0'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-        g_du_z1_tune_res = _dml_tune(du, x, train_inds_z1,
-                                     self._learner['ml_g_du_z1'], param_grids['ml_g_du_z1'], scoring_methods['ml_g_du_z1'],
-                                     n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        m_z_tune_res = _dml_tune(
+            z,
+            x,
+            train_inds,
+            self._learner["ml_m_z"],
+            param_grids["ml_m_z"],
+            scoring_methods["ml_m_z"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_d_z0_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds_z0,
+            self._learner["ml_m_d_z0"],
+            param_grids["ml_m_d_z0"],
+            scoring_methods["ml_m_d_z0"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        m_d_z1_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds_z1,
+            self._learner["ml_m_d_z1"],
+            param_grids["ml_m_d_z1"],
+            scoring_methods["ml_m_d_z1"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g_du_z0_tune_res = _dml_tune(
+            du,
+            x,
+            train_inds_z0,
+            self._learner["ml_g_du_z0"],
+            param_grids["ml_g_du_z0"],
+            scoring_methods["ml_g_du_z0"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+        g_du_z1_tune_res = _dml_tune(
+            du,
+            x,
+            train_inds_z1,
+            self._learner["ml_g_du_z1"],
+            param_grids["ml_g_du_z1"],
+            scoring_methods["ml_g_du_z1"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         m_z_best_params = [xx.best_params_ for xx in m_z_tune_res]
         m_d_z0_best_params = [xx.best_params_ for xx in m_d_z0_tune_res]
@@ -526,34 +643,40 @@ def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_
         g_du_z0_best_params = [xx.best_params_ for xx in g_du_z0_tune_res]
         g_du_z1_best_params = [xx.best_params_ for xx in g_du_z1_tune_res]
 
-        params = {'ml_m_z': m_z_best_params,
-                  'ml_m_d_z0': m_d_z0_best_params,
-                  'ml_m_d_z1': m_d_z1_best_params,
-                  'ml_g_du_z0': g_du_z0_best_params,
-                  'ml_g_du_z1': g_du_z1_best_params}
-        tune_res = {'ml_m_z': m_z_tune_res,
-                    'ml_m_d_z0': m_d_z0_tune_res,
-                    'ml_m_d_z1':  m_d_z1_tune_res,
-                    'ml_g_du_z0': g_du_z0_tune_res,
-                    'ml_g_du_z1': g_du_z1_tune_res}
-
-        res = {'params': params,
-               'tune_res': tune_res}
+        params = {
+            "ml_m_z": m_z_best_params,
+            "ml_m_d_z0": m_d_z0_best_params,
+            "ml_m_d_z1": m_d_z1_best_params,
+            "ml_g_du_z0": g_du_z0_best_params,
+            "ml_g_du_z1": g_du_z1_best_params,
+        }
+        tune_res = {
+            "ml_m_z": m_z_tune_res,
+            "ml_m_d_z0": m_d_z0_tune_res,
+            "ml_m_d_z1": m_d_z1_tune_res,
+            "ml_g_du_z0": g_du_z0_tune_res,
+            "ml_g_du_z1": g_du_z1_tune_res,
+        }
+
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "The data must be of DoubleMLData type. " f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         _check_zero_one_treatment(self)
-        one_instr = (obj_dml_data.n_instr == 1)
-        err_msg = ('Incompatible data. '
-                   'To fit an LPQ model with DML '
-                   'exactly one binary variable with values 0 and 1 '
-                   'needs to be specified as instrumental variable.')
+        one_instr = obj_dml_data.n_instr == 1
+        err_msg = (
+            "Incompatible data. "
+            "To fit an LPQ model with DML "
+            "exactly one binary variable with values 0 and 1 "
+            "needs to be specified as instrumental variable."
+        )
         if one_instr:
-            binary_instr = (type_of_target(obj_dml_data.z) == 'binary')
+            binary_instr = type_of_target(obj_dml_data.z) == "binary"
             zero_one_instr = np.all((np.power(obj_dml_data.z, 2) - obj_dml_data.z) == 0)
             if not (one_instr & binary_instr & zero_one_instr):
                 raise ValueError(err_msg)
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
new file mode 100644
index 00000000..af30b879
--- /dev/null
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -0,0 +1,74 @@
+import numpy as np
+import pytest
+import math
+from sklearn.linear_model import LogisticRegression
+from doubleml import DoubleMLLPQ, DoubleMLData
+from doubleml.datasets import make_iivm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+from ._utils import draw_smpls
+
+
+@pytest.fixture(scope="module", params=["dml1", "dml2"])
+def dml_procedure(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
+    ext_predictions = {"d": {}}
+    np.random.seed(3141)
+    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, return_type='DataFrame')
+
+    dml_data = DoubleMLData(data, 'y', 'd', z_cols='z')
+    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.z)
+    all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
+
+    kwargs = {
+        "obj_dml_data": dml_data,
+        "score": "LPQ",
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "normalize_ipw": normalize_ipw,
+        #"draw_sample_splitting": False
+    }
+
+    ml_g = LogisticRegression()
+    ml_m = LogisticRegression()
+
+    DMLLPQ = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    DMLLPQ.set_sample_splitting(all_smpls)
+    
+    np.random.seed(3141)
+    DMLLPQ.fit(store_predictions=True)
+
+    ext_predictions["d"]["ml_m_z"] = DMLLPQ.predictions["ml_m_z"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z0"] = DMLLPQ.predictions["ml_m_d_z0"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z1"] = DMLLPQ.predictions["ml_m_d_z1"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z0"] = DMLLPQ.predictions["ml_g_du_z0"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z1"] = DMLLPQ.predictions["ml_g_du_z1"][:, :, 0]
+
+    DMLLPLQ_ext = DoubleMLLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    DMLLPLQ_ext.set_sample_splitting(all_smpls)
+
+    np.random.seed(3141)
+    DMLLPLQ_ext.fit(external_predictions=ext_predictions)
+
+    res_dict = {"coef_normal": DMLLPQ.coef, "coef_ext": DMLLPLQ_ext.coef}
+
+    return res_dict
+
+
+@pytest.mark.ci
+def test_doubleml_lpq_coef(doubleml_lpq_fixture):
+    assert math.isclose(doubleml_lpq_fixture["coef_normal"], doubleml_lpq_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 353ba38846a7bd95a74e03ab325eb0153aee1ff3 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 1 Dec 2023 08:28:02 +0100
Subject: [PATCH 063/134] fix docstring

---
 doubleml/double_ml_blp.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 3bc44a73..8ecdf41a 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -133,11 +133,11 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
         basis : :class:`pandas.DataFrame`
             The basis for constructing the confidence interval. Has to have the same form as the basis from
             the construction. If ``None`` the basis for the construction of the model is used.
-            Default is ``None``
+            Default is ``None``.
 
         joint : bool
             Indicates whether joint confidence intervals are computed.
-            Default is ``False``
+            Default is ``False``.
 
         level : float
             The confidence level.

From 464a3f6c7cd05d9ceb1cc1ef45f1cca8b96a7922 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 1 Dec 2023 11:48:22 +0100
Subject: [PATCH 064/134] fix ext. preds. for LPQ model

---
 doubleml/tests/test_lpq_external_predictions.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index af30b879..36a838e5 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -27,11 +27,12 @@ def normalize_ipw(request):
 def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, return_type='DataFrame')
+    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, alpha_x=1.0, return_type='DataFrame')
 
     dml_data = DoubleMLData(data, 'y', 'd', z_cols='z')
     #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.z)
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
+    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=[dml_data.d, dml_data.z])
     #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
 
     kwargs = {
@@ -40,7 +41,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
         "n_rep": n_rep,
         "dml_procedure": dml_procedure,
         "normalize_ipw": normalize_ipw,
-        #"draw_sample_splitting": False
+        "draw_sample_splitting": False
     }
 
     ml_g = LogisticRegression()

From 2d9125c8c6fa706b0d9218247f4ddd234266f6c3 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 1 Dec 2023 11:51:31 +0100
Subject: [PATCH 065/134] optimize unit test for ext. preds in LPQ

---
 doubleml/tests/test_lpq_external_predictions.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index 36a838e5..db321b37 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -27,13 +27,10 @@ def normalize_ipw(request):
 def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, alpha_x=1.0, return_type='DataFrame')
+    data = make_iivm_data(theta=0.5, n_obs=500, dim_x=20, alpha_x=1.0, return_type="DataFrame")
 
-    dml_data = DoubleMLData(data, 'y', 'd', z_cols='z')
-    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.z)
+    dml_data = DoubleMLData(data, "y", "d", z_cols="z")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)
-    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=[dml_data.d, dml_data.z])
-    #all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
 
     kwargs = {
         "obj_dml_data": dml_data,
@@ -41,7 +38,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
         "n_rep": n_rep,
         "dml_procedure": dml_procedure,
         "normalize_ipw": normalize_ipw,
-        "draw_sample_splitting": False
+        "draw_sample_splitting": False,
     }
 
     ml_g = LogisticRegression()
@@ -49,7 +46,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
 
     DMLLPQ = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLLPQ.set_sample_splitting(all_smpls)
-    
+
     np.random.seed(3141)
     DMLLPQ.fit(store_predictions=True)
 

From 3a2c7d78b884f6386b78955f043f28cb3181099b Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 1 Dec 2023 14:23:56 +0100
Subject: [PATCH 066/134] start to adapt blp to gate with overlapping groups

---
 doubleml/double_ml_blp.py | 21 +++++++++++++++++++--
 doubleml/double_ml_irm.py |  3 +++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index 8ecdf41a..c35c7ba3 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -1,6 +1,7 @@
 import statsmodels.api as sm
 import numpy as np
 import pandas as pd
+import warnings
 
 from scipy.stats import norm
 from scipy.linalg import sqrtm
@@ -132,7 +133,8 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
         ----------
         basis : :class:`pandas.DataFrame`
             The basis for constructing the confidence interval. Has to have the same form as the basis from
-            the construction. If ``None`` the basis for the construction of the model is used.
+            the construction. If ``None`` is passed, if the basis is constructed for GATEs, the GATEs are returned.
+            Else, the confidence intervals for the basis coefficients are returned (with pointwise cofidence intervals).
             Default is ``None``.
 
         joint : bool
@@ -180,9 +182,24 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
             if self._is_gate:
                 # reduce to unique groups
                 basis = pd.DataFrame(np.diag(v=np.full((self._basis.shape[1]), True)))
+                # add intercept for ATE to groups
+                basis.insert(0, "ATE", [True] * basis.shape[0])
                 gate_names = list(self._basis.columns.values)
             else:
-                basis = self._basis
+                if joint:
+                    warnings.warn('Returning pointwise confidence intervals for basis coefficients.')
+                # return the confidence intervals for the basis coefficients
+                ci = np.vstack((
+                    self.blp_model.conf_int(alpha=alpha/2)[0],
+                    self.blp_model.params,
+                    self.blp_model.conf_int(alpha=alpha/2)[1])
+                    ).T
+                df_ci = pd.DataFrame(
+                    ci,
+                    columns=['{:.1f} %'.format(alpha/2 * 100), 'effect', '{:.1f} %'.format((1-alpha/2) * 100)],
+                    index=self._basis.columns)
+                return df_ci
+
         elif not (basis.shape[1] == self._basis.shape[1]):
             raise ValueError('Invalid basis: DataFrame has to have the exact same number and ordering of columns.')
         elif not list(basis.columns.values) == list(self._basis.columns.values):
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 6a33c42e..99e68953 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -461,6 +461,9 @@ def gate(self, groups):
         if any(groups.sum(0) <= 5):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
+        # add intercept for ATE to groups
+        groups.insert(0, "ATE", [True] * groups.shape[0])
+
         model = self.cate(groups, is_gate=True)
         return model
 

From bab4815e7e5316bac60c748aec47456c3b07aa80 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 1 Dec 2023 15:07:24 +0100
Subject: [PATCH 067/134] fix gate and tests

---
 doubleml/double_ml_blp.py                  |  4 +---
 doubleml/double_ml_irm.py                  |  9 ++++++---
 doubleml/double_ml_plr.py                  | 10 ++++++++--
 doubleml/tests/test_blp.py                 | 12 +++++++++---
 doubleml/tests/test_doubleml_exceptions.py | 17 ++++++++++++++++-
 doubleml/tests/test_irm.py                 |  4 ++--
 doubleml/tests/test_plr.py                 |  4 ++--
 7 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/doubleml/double_ml_blp.py b/doubleml/double_ml_blp.py
index c35c7ba3..bfdf7671 100644
--- a/doubleml/double_ml_blp.py
+++ b/doubleml/double_ml_blp.py
@@ -182,12 +182,10 @@ def confint(self, basis=None, joint=False, level=0.95, n_rep_boot=500):
             if self._is_gate:
                 # reduce to unique groups
                 basis = pd.DataFrame(np.diag(v=np.full((self._basis.shape[1]), True)))
-                # add intercept for ATE to groups
-                basis.insert(0, "ATE", [True] * basis.shape[0])
                 gate_names = list(self._basis.columns.values)
             else:
                 if joint:
-                    warnings.warn('Returning pointwise confidence intervals for basis coefficients.')
+                    warnings.warn('Returning pointwise confidence intervals for basis coefficients.', UserWarning)
                 # return the confidence intervals for the basis coefficients
                 ci = np.vstack((
                     self.blp_model.conf_int(alpha=alpha/2)[0],
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 99e68953..14285658 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -433,7 +433,7 @@ def cate(self, basis, is_gate=False):
 
     def gate(self, groups):
         """
-        Calculate group average treatment effects (GATE) for mutually exclusive groups.
+        Calculate group average treatment effects (GATE) for groups.
 
         Parameters
         ----------
@@ -462,9 +462,12 @@ def gate(self, groups):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
         # add intercept for ATE to groups
-        groups.insert(0, "ATE", [True] * groups.shape[0])
+        basis = groups.copy(deep=True)
+        basis.insert(0, "ATE", [True] * groups.shape[0])
+        # convert to float
+        basis = basis.astype(float)
 
-        model = self.cate(groups, is_gate=True)
+        model = self.cate(basis, is_gate=True)
         return model
 
     def policy_tree(self, features, depth=2, **tree_params):
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index acd98f26..75dbbd7d 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -368,7 +368,7 @@ def cate(self, basis, is_gate=False):
 
     def gate(self, groups):
         """
-        Calculate group average treatment effects (GATE) for mutually exclusive groups.
+        Calculate group average treatment effects (GATE) for groups.
 
         Parameters
         ----------
@@ -396,7 +396,13 @@ def gate(self, groups):
         if any(groups.sum(0) <= 5):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
-        model = self.cate(groups, is_gate=True)
+        # add intercept for ATE to groups
+        basis = groups.copy(deep=True)
+        basis.insert(0, "ATE", [True] * groups.shape[0])
+        # convert to float
+        basis = basis.astype(float)
+
+        model = self.cate(basis, is_gate=True)
         return model
 
     def _partial_out(self):
diff --git a/doubleml/tests/test_blp.py b/doubleml/tests/test_blp.py
index e20b0722..b1618ca9 100644
--- a/doubleml/tests/test_blp.py
+++ b/doubleml/tests/test_blp.py
@@ -37,6 +37,11 @@ def dml_blp_fixture(ci_joint, ci_level):
     ci_1 = blp.confint(random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
     np.random.seed(42)
     ci_2 = blp.confint(joint=ci_joint, level=ci_level, n_rep_boot=1000)
+    expected_ci_2 = np.vstack((
+        blp.blp_model.conf_int(alpha=(1-ci_level)/2)[0],
+        blp.blp_model.params,
+        blp.blp_model.conf_int(alpha=(1-ci_level)/2)[1])).T
+
     np.random.seed(42)
     ci_manual = blp_confint(blp_manual, random_basis, joint=ci_joint, level=ci_level, n_rep_boot=1000)
 
@@ -50,6 +55,7 @@ def dml_blp_fixture(ci_joint, ci_level):
                 'signal': blp.orth_signal,
                 'ci_1': ci_1,
                 'ci_2': ci_2,
+                'expected_ci_2': expected_ci_2,
                 'ci_manual': ci_manual,
                 'blp_model': blp,
                 'unfitted_blp_model': blp_obj}
@@ -79,14 +85,14 @@ def test_dml_blp_omega(dml_blp_fixture):
 
 
 @pytest.mark.ci
-def test_dml_blp_ci_1(dml_blp_fixture):
-    assert np.allclose(dml_blp_fixture['ci_1'],
+def test_dml_blp_ci_2(dml_blp_fixture):
+    assert np.allclose(dml_blp_fixture['expected_ci_2'],
                        dml_blp_fixture['ci_2'],
                        rtol=1e-9, atol=1e-4)
 
 
 @pytest.mark.ci
-def test_dml_blp_ci_2(dml_blp_fixture):
+def test_dml_blp_ci_1(dml_blp_fixture):
     assert np.allclose(dml_blp_fixture['ci_1'],
                        dml_blp_fixture['ci_manual'],
                        rtol=1e-9, atol=1e-4)
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index c7b61d12..b46c04cd 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -3,7 +3,8 @@
 import numpy as np
 
 from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, \
-    DoubleMLClusterData, DoubleMLPQ, DoubleMLLPQ, DoubleMLCVAR, DoubleMLQTE, DoubleMLDID, DoubleMLDIDCS
+    DoubleMLClusterData, DoubleMLPQ, DoubleMLLPQ, DoubleMLCVAR, DoubleMLQTE, DoubleMLDID, \
+    DoubleMLDIDCS, DoubleMLBLP
 from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \
     make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020
 from doubleml.double_ml_data import DoubleMLBaseData
@@ -1256,6 +1257,20 @@ def test_doubleml_nan_prediction():
         _ = DoubleMLPLR(dml_data, ml_l, LassoWithInfPred()).fit()
 
 
+@pytest.mark.ci
+def test_doubleml_warning_blp():
+    n = 5
+    np.random.seed(42)
+    random_basis = pd.DataFrame(np.random.normal(0, 1, size=(n, 3)))
+    random_signal = np.random.normal(0, 1, size=(n, ))
+    blp = DoubleMLBLP(random_signal, random_basis)
+    blp.fit()
+
+    msg = 'Returning pointwise confidence intervals for basis coefficients.'
+    with pytest.warns(UserWarning, match=msg):
+        _ = blp.confint(joint=True)
+
+
 @pytest.mark.ci
 def test_doubleml_exception_gate():
     dml_irm_obj = DoubleMLIRM(dml_data_irm,
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index e4842ffa..dca5cef4 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -199,7 +199,7 @@ def test_dml_irm_cate_gate():
         gate_1 = dml_irm_obj.gate(groups_1)
     assert isinstance(gate_1, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_1.confint(), pd.DataFrame)
-    assert all(gate_1.confint().index == groups_1.columns)
+    assert all(gate_1.confint().index == ['ATE'] + groups_1.columns.to_list())
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
@@ -208,4 +208,4 @@ def test_dml_irm_cate_gate():
         gate_2 = dml_irm_obj.gate(groups_2)
     assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_2.confint(), pd.DataFrame)
-    assert all(gate_2.confint().index == ["Group_1", "Group_2"])
+    assert all(gate_2.confint().index == ["ATE", "Group_1", "Group_2"])
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index fd184be9..d1b22935 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -316,7 +316,7 @@ def test_dml_plr_cate_gate(score, dml_procedure):
         gate_1 = dml_plr_obj.gate(groups_1)
     assert isinstance(gate_1, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_1.confint(), pd.DataFrame)
-    assert all(gate_1.confint().index == groups_1.columns)
+    assert all(gate_1.confint().index == ["ATE"] + groups_1.columns.tolist())
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
@@ -325,4 +325,4 @@ def test_dml_plr_cate_gate(score, dml_procedure):
         gate_2 = dml_plr_obj.gate(groups_2)
     assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_2.confint(), pd.DataFrame)
-    assert all(gate_2.confint().index == ["Group_1", "Group_2"])
+    assert all(gate_2.confint().index == ["ATE", "Group_1", "Group_2"])

From db30d3c9eb6a1812a756d321ae73b8d55afed4eb Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 1 Dec 2023 16:48:45 +0100
Subject: [PATCH 068/134] add dataset for hte

---
 doubleml/datasets.py            | 112 ++++++++++++++++++++++++++++++++
 doubleml/tests/test_datasets.py |  34 +++++++++-
 2 files changed, 145 insertions(+), 1 deletion(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 02bac5ea..60926692 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1228,3 +1228,115 @@ def f_g(beta_a):
                 'oracle_values': oracle_values}
 
     return res_dict
+
+
+def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
+    """
+    Creates a simple synthetic example for heterogeneous treatment effects.
+    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019) and a notebook from EconML.
+
+    The data is generated as
+
+    .. math::
+
+        Y_i & = g(X_i)T_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
+
+        T_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
+
+    where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
+    \\sim\\mathcal{U}[-1,1]``.
+    If the treatment is set to be binary, the treatment is generated as
+
+    .. math::
+        T_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
+
+    The coefficient vectors :math:`\\gamma_0` and :math:`\beta_0` both have small random (identical) support
+    which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
+    Further, :math:`g(x)` defines the conditional treatment effect, which is defined differently depending
+    on the dimension of :math:`x`.
+
+    If the heterogeneity is univariate the conditional treatment effect takes the following form
+
+    .. math::
+            g(x) = \\exp(2x_1) + 3\\sin(4x_1),
+
+    whereas for the two-dimensional case the conditional treatment effect is defined as
+
+    .. math::
+        g(x) = \\exp(2x_1) + 3\\sin(4x_2).
+
+    Parameters
+    ----------
+    n_obs : int
+        Number of observations to simulate.
+        Default is ``200``.
+
+    p : int
+        Dimension of covariates.
+        Default is ``30``.
+
+    support_size : int
+        Number of relevant (confounding) covariates.
+        Default is ``5``.
+
+    n_x : int
+        Dimension of the heterogeneity. Can be either ``1`` or ``2``.
+        Default is ``1``.
+
+    binary_treatment : bool
+        Indicates whether the treatment is binary.
+        Default is ``False``.
+
+    Returns
+    -------
+    res_dict : dictionary
+       Dictionary with entries ``data``, ``treatment_effect``.
+
+    """
+    # simple input checks
+    assert n_x in [1, 2], 'n_x must be either 1 or 2.'
+    assert support_size <= p, 'support_size must be smaller than p.'
+    assert isinstance(binary_treatment, bool), 'binary_treatment must be a boolean.'
+
+    # define treatment effects
+    def treatment_effect_1d(x):
+        te = np.exp(2 * x[0]) + 3 * np.sin(4 * x[0])
+        return te
+
+    def treatment_effect_2d(x):
+        te = np.exp(2 * x[0]) + 3 * np.sin(4 * x[1])
+        return te
+
+    # Outcome support and coefficients
+    support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
+    coefs_y = np.random.uniform(0, 1, size=support_size)
+    # treatment support and coefficients
+    support_d = support_y
+    coefs_d = np.random.uniform(0, 1, size=support_size)
+
+    # noise
+    epsilon = np.random.uniform(-1, 1, size=n_obs)
+    eta = np.random.uniform(-1, 1, size=n_obs)
+
+    # Generate controls, covariates, treatments and outcomes
+    x = np.random.uniform(0, 1, size=(n_obs, p))
+    # Heterogeneous treatment effects
+    if n_x == 1:
+        te = np.array([treatment_effect_1d(x_i) for x_i in x]).reshape(-1)
+    elif n_x == 2:
+        te = np.array([treatment_effect_2d(x_i) for x_i in x]).reshape(-1)
+    d = np.dot(x[:, support_d], coefs_d) + eta
+    y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
+
+    # Now we build the dataset
+    y_df = pd.DataFrame({'y': y})
+    d_df = pd.DataFrame({'d': d})
+    x_df = pd.DataFrame(
+        data=x,
+        index=np.arange(x.shape[0]),
+        columns=[f'X_{i}' for i in range(x.shape[1])]
+    )
+
+    data = pd.concat([y_df, d_df, x_df], axis=1)
+    res_dict = {'data': data, 'treatment_effect': te}
+    return res_dict
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index cb39e8f0..01e2612d 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -5,7 +5,7 @@
 from doubleml import DoubleMLData, DoubleMLClusterData
 from doubleml.datasets import fetch_401K, fetch_bonus, make_plr_CCDDHNR2018, make_plr_turrell2018, \
     make_irm_data, make_iivm_data, _make_pliv_data, make_pliv_CHS2015, make_pliv_multiway_cluster_CKMS2021, \
-    make_did_SZ2020, make_confounded_irm_data, make_confounded_plr_data
+    make_did_SZ2020, make_confounded_irm_data, make_confounded_plr_data, make_heterogeneous_data
 
 msg_inv_return_type = 'Invalid return_type.'
 
@@ -227,3 +227,35 @@ def test_make_confounded_plr_data_return_types():
     assert isinstance(res['oracle_values']['beta_a'], float)
     assert isinstance(res['oracle_values']['a'], np.ndarray)
     assert isinstance(res['oracle_values']['z'], np.ndarray)
+
+
+@pytest.fixture(scope='function',
+                params=[False, True])
+def binary_treatment(request):
+    return request.param
+
+
+@pytest.fixture(scope='function',
+                params=[1, 2])
+def n_x(request):
+    return request.param
+
+
+@pytest.mark.ci
+def test_make_heterogeneous_data_return_types(binary_treatment, n_x):
+    np.random.seed(3141)
+    res = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=binary_treatment)
+    assert isinstance(res, dict)
+    assert isinstance(res['data'], pd.DataFrame)
+    assert isinstance(res['treatment_effect'], np.ndarray)
+
+    # test input checks
+    msg = 'n_x must be either 1 or 2.'
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=0, binary_treatment=binary_treatment)
+    msg = 'support_size must be smaller than p.'
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, support_size=31, binary_treatment=binary_treatment)
+    msg = 'binary_treatment must be a boolean.'
+    with pytest.raises(AssertionError, match=msg):
+        _ = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=2)

From ad9059ec82e72d57c7a0501533d1a8b51608ea98 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 1 Dec 2023 17:02:16 +0100
Subject: [PATCH 069/134] Update datasets.py

---
 doubleml/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 60926692..19a8db33 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1233,7 +1233,7 @@ def f_g(beta_a):
 def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treatment=False):
     """
     Creates a simple synthetic example for heterogeneous treatment effects.
-    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019) and a notebook from EconML.
+    The data generating process is based on the Monte Carlo simulation from Oprescu et al. (2019).
 
     The data is generated as
 

From 39000e53d7ca5457df0df77c7cebdea13edd299b Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 1 Dec 2023 17:22:38 +0100
Subject: [PATCH 070/134] add treatment effect callable

---
 doubleml/datasets.py            | 29 ++++++++++++++---------------
 doubleml/tests/test_datasets.py |  3 ++-
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 19a8db33..fd1a6ccc 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1258,12 +1258,12 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
     If the heterogeneity is univariate the conditional treatment effect takes the following form
 
     .. math::
-            g(x) = \\exp(2x_1) + 3\\sin(4x_1),
+            g(x) = \\exp(2x_0) + 3\\sin(4x_0),
 
     whereas for the two-dimensional case the conditional treatment effect is defined as
 
     .. math::
-        g(x) = \\exp(2x_1) + 3\\sin(4x_2).
+        g(x) = \\exp(2x_0) + 3\\sin(4x_1).
 
     Parameters
     ----------
@@ -1290,7 +1290,7 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
     Returns
     -------
     res_dict : dictionary
-       Dictionary with entries ``data``, ``treatment_effect``.
+       Dictionary with entries ``data``, ``effects``, ``treatment_effect``.
 
     """
     # simple input checks
@@ -1299,13 +1299,12 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
     assert isinstance(binary_treatment, bool), 'binary_treatment must be a boolean.'
 
     # define treatment effects
-    def treatment_effect_1d(x):
-        te = np.exp(2 * x[0]) + 3 * np.sin(4 * x[0])
-        return te
-
-    def treatment_effect_2d(x):
-        te = np.exp(2 * x[0]) + 3 * np.sin(4 * x[1])
-        return te
+    if n_x == 1:
+        def treatment_effect(x):
+            return np.exp(2 * x[0]) + 3 * np.sin(4 * x[0])
+    elif n_x == 2:
+        def treatment_effect(x):
+            return np.exp(2 * x[0]) + 3 * np.sin(4 * x[1])
 
     # Outcome support and coefficients
     support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
@@ -1321,10 +1320,7 @@ def treatment_effect_2d(x):
     # Generate controls, covariates, treatments and outcomes
     x = np.random.uniform(0, 1, size=(n_obs, p))
     # Heterogeneous treatment effects
-    if n_x == 1:
-        te = np.array([treatment_effect_1d(x_i) for x_i in x]).reshape(-1)
-    elif n_x == 2:
-        te = np.array([treatment_effect_2d(x_i) for x_i in x]).reshape(-1)
+    te = np.array([treatment_effect(x_i) for x_i in x]).reshape(-1)
     d = np.dot(x[:, support_d], coefs_d) + eta
     y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
 
@@ -1338,5 +1334,8 @@ def treatment_effect_2d(x):
     )
 
     data = pd.concat([y_df, d_df, x_df], axis=1)
-    res_dict = {'data': data, 'treatment_effect': te}
+    res_dict = {
+        'data': data,
+        'effects': te,
+        'treatment_effect': treatment_effect}
     return res_dict
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index 01e2612d..2d414798 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -247,7 +247,8 @@ def test_make_heterogeneous_data_return_types(binary_treatment, n_x):
     res = make_heterogeneous_data(n_obs=100, n_x=n_x, binary_treatment=binary_treatment)
     assert isinstance(res, dict)
     assert isinstance(res['data'], pd.DataFrame)
-    assert isinstance(res['treatment_effect'], np.ndarray)
+    assert isinstance(res['effects'], np.ndarray)
+    assert callable(res['treatment_effect'])
 
     # test input checks
     msg = 'n_x must be either 1 or 2.'

From 2e8740f97b7592933e756ae5f10281e890d236b1 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 1 Dec 2023 18:03:09 +0100
Subject: [PATCH 071/134] Update datasets.py

---
 doubleml/datasets.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index fd1a6ccc..8fcdb46f 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1301,10 +1301,10 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
     # define treatment effects
     if n_x == 1:
         def treatment_effect(x):
-            return np.exp(2 * x[0]) + 3 * np.sin(4 * x[0])
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
     elif n_x == 2:
         def treatment_effect(x):
-            return np.exp(2 * x[0]) + 3 * np.sin(4 * x[1])
+            return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
 
     # Outcome support and coefficients
     support_y = np.random.choice(np.arange(p), size=support_size, replace=False)
@@ -1320,7 +1320,7 @@ def treatment_effect(x):
     # Generate controls, covariates, treatments and outcomes
     x = np.random.uniform(0, 1, size=(n_obs, p))
     # Heterogeneous treatment effects
-    te = np.array([treatment_effect(x_i) for x_i in x]).reshape(-1)
+    te = treatment_effect(x)
     d = np.dot(x[:, support_d], coefs_d) + eta
     y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
 

From d323dad82ec38353f4dbc2847490eece62d90369 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 1 Dec 2023 18:09:29 +0100
Subject: [PATCH 072/134] Update datasets.py

---
 doubleml/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 8fcdb46f..33dc273c 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1250,7 +1250,7 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
     .. math::
         T_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
 
-    The coefficient vectors :math:`\\gamma_0` and :math:`\beta_0` both have small random (identical) support
+    The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
     which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
     Further, :math:`g(x)` defines the conditional treatment effect, which is defined differently depending
     on the dimension of :math:`x`.

From 950ae57c37a3c4ec65d384b263ae6feac883b139 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Sun, 3 Dec 2023 12:58:08 +0100
Subject: [PATCH 073/134] Update datasets.py

---
 doubleml/datasets.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 33dc273c..19d94184 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1302,7 +1302,10 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
     if n_x == 1:
         def treatment_effect(x):
             return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 0])
-    elif n_x == 2:
+    else:
+        assert n_x == 2
+
+        # redefine treatment effect
         def treatment_effect(x):
             return np.exp(2 * x[:, 0]) + 3 * np.sin(4 * x[:, 1])
 

From e0fc5118160614b01650aa4c2a2837d031e85cf3 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Sun, 3 Dec 2023 14:01:48 +0100
Subject: [PATCH 074/134] Update datasets.py

---
 doubleml/datasets.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 19d94184..01415caf 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1314,7 +1314,7 @@ def treatment_effect(x):
     coefs_y = np.random.uniform(0, 1, size=support_size)
     # treatment support and coefficients
     support_d = support_y
-    coefs_d = np.random.uniform(0, 1, size=support_size)
+    coefs_d = np.random.uniform(0, 0.3, size=support_size)
 
     # noise
     epsilon = np.random.uniform(-1, 1, size=n_obs)
@@ -1324,7 +1324,10 @@ def treatment_effect(x):
     x = np.random.uniform(0, 1, size=(n_obs, p))
     # Heterogeneous treatment effects
     te = treatment_effect(x)
-    d = np.dot(x[:, support_d], coefs_d) + eta
+    if binary_treatment:
+        d = 1.0 * (np.dot(x[:, support_d], coefs_d) >= eta)
+    else:
+        d = np.dot(x[:, support_d], coefs_d) + eta
     y = te * d + np.dot(x[:, support_y], coefs_y) + epsilon
 
     # Now we build the dataset

From fbe6fbf77c748c6d6dcc3866bc2c0d96a009120a Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 4 Dec 2023 11:51:25 +0100
Subject: [PATCH 075/134] fix gates

---
 doubleml/double_ml_irm.py  | 10 ++--------
 doubleml/double_ml_plr.py  | 10 ++--------
 doubleml/tests/test_irm.py |  4 ++--
 doubleml/tests/test_plr.py |  4 ++--
 4 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 14285658..d8723189 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -438,7 +438,7 @@ def gate(self, groups):
         Parameters
         ----------
         groups : :class:`pandas.DataFrame`
-            The group indicator for estimating the best linear predictor.
+            The group indicator for estimating the best linear predictor. Groups should be mutually exclusive.
             Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
             and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).
 
@@ -461,13 +461,7 @@ def gate(self, groups):
         if any(groups.sum(0) <= 5):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
-        # add intercept for ATE to groups
-        basis = groups.copy(deep=True)
-        basis.insert(0, "ATE", [True] * groups.shape[0])
-        # convert to float
-        basis = basis.astype(float)
-
-        model = self.cate(basis, is_gate=True)
+        model = self.cate(groups, is_gate=True)
         return model
 
     def policy_tree(self, features, depth=2, **tree_params):
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 75dbbd7d..8d730daf 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -373,7 +373,7 @@ def gate(self, groups):
         Parameters
         ----------
         groups : :class:`pandas.DataFrame`
-            The group indicator for estimating the best linear predictor.
+            The group indicator for estimating the best linear predictor. Groups should be mutually exclusive.
             Has to be dummy coded with shape ``(n_obs, d)``, where ``n_obs`` is the number of observations
             and ``d`` is the number of groups or ``(n_obs, 1)`` and contain the corresponding groups (as str).
 
@@ -396,13 +396,7 @@ def gate(self, groups):
         if any(groups.sum(0) <= 5):
             warnings.warn('At least one group effect is estimated with less than 6 observations.')
 
-        # add intercept for ATE to groups
-        basis = groups.copy(deep=True)
-        basis.insert(0, "ATE", [True] * groups.shape[0])
-        # convert to float
-        basis = basis.astype(float)
-
-        model = self.cate(basis, is_gate=True)
+        model = self.cate(groups, is_gate=True)
         return model
 
     def _partial_out(self):
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index dca5cef4..8b7c4a2b 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -199,7 +199,7 @@ def test_dml_irm_cate_gate():
         gate_1 = dml_irm_obj.gate(groups_1)
     assert isinstance(gate_1, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_1.confint(), pd.DataFrame)
-    assert all(gate_1.confint().index == ['ATE'] + groups_1.columns.to_list())
+    assert all(gate_1.confint().index == groups_1.columns.to_list())
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
@@ -208,4 +208,4 @@ def test_dml_irm_cate_gate():
         gate_2 = dml_irm_obj.gate(groups_2)
     assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_2.confint(), pd.DataFrame)
-    assert all(gate_2.confint().index == ["ATE", "Group_1", "Group_2"])
+    assert all(gate_2.confint().index == ["Group_1", "Group_2"])
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index d1b22935..1d1d75ca 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -316,7 +316,7 @@ def test_dml_plr_cate_gate(score, dml_procedure):
         gate_1 = dml_plr_obj.gate(groups_1)
     assert isinstance(gate_1, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_1.confint(), pd.DataFrame)
-    assert all(gate_1.confint().index == ["ATE"] + groups_1.columns.tolist())
+    assert all(gate_1.confint().index == groups_1.columns.tolist())
 
     np.random.seed(42)
     groups_2 = pd.DataFrame(np.random.choice(["1", "2"], n))
@@ -325,4 +325,4 @@ def test_dml_plr_cate_gate(score, dml_procedure):
         gate_2 = dml_plr_obj.gate(groups_2)
     assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_2.confint(), pd.DataFrame)
-    assert all(gate_2.confint().index == ["ATE", "Group_1", "Group_2"])
+    assert all(gate_2.confint().index == ["Group_1", "Group_2"])

From 3186098d775eb7aed82b8c8c577257b5cb302df9 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 4 Dec 2023 12:04:06 +0100
Subject: [PATCH 076/134] fix documentation

---
 doubleml/datasets.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index 01415caf..e991ae05 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1239,31 +1239,31 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
 
     .. math::
 
-        Y_i & = g(X_i)T_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
+        Y_i & = \\theta_0(X_i)D_i + \\langle X_i,\\gamma_0\\rangle + \\epsilon_i
 
-        T_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
+        D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
 
     where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
     \\sim\\mathcal{U}[-1,1]``.
     If the treatment is set to be binary, the treatment is generated as
 
     .. math::
-        T_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
+        D_i = 1\\{\\langle X_i,\\beta_0\\rangle \\ge \\eta_i\\}.
 
     The coefficient vectors :math:`\\gamma_0` and :math:`\\beta_0` both have small random (identical) support
     which values are drawn independently from :math:`\\mathcal{U}[0,1]` and :math:`\\mathcal{U}[0,0.3]`.
-    Further, :math:`g(x)` defines the conditional treatment effect, which is defined differently depending
+    Further, :math:`\\theta_0(x)` defines the conditional treatment effect, which is defined differently depending
     on the dimension of :math:`x`.
 
     If the heterogeneity is univariate the conditional treatment effect takes the following form
 
     .. math::
-            g(x) = \\exp(2x_0) + 3\\sin(4x_0),
+            \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_0),
 
     whereas for the two-dimensional case the conditional treatment effect is defined as
 
     .. math::
-        g(x) = \\exp(2x_0) + 3\\sin(4x_1).
+        \\theta_0(x) = \\exp(2x_0) + 3\\sin(4x_1).
 
     Parameters
     ----------

From 875f9a2568dcf599fe7a2ed82132ba029d830fc8 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 4 Dec 2023 14:44:45 +0100
Subject: [PATCH 077/134] fix docstring

---
 doubleml/datasets.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/datasets.py b/doubleml/datasets.py
index e991ae05..f534147e 100644
--- a/doubleml/datasets.py
+++ b/doubleml/datasets.py
@@ -1244,7 +1244,7 @@ def make_heterogeneous_data(n_obs=200, p=30, support_size=5, n_x=1, binary_treat
         D_i & = \\langle X_i,\\beta_0\\rangle + \\eta_i,
 
     where :math:`X_i\\sim\\mathcal{U}[0,1]^{p}` and :math:`\\epsilon_i,\\eta_i
-    \\sim\\mathcal{U}[-1,1]``.
+    \\sim\\mathcal{U}[-1,1]`.
     If the treatment is set to be binary, the treatment is generated as
 
     .. math::

From 79bb38d971aba08079270f230d1383a52f0dedb6 Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:01:46 +0100
Subject: [PATCH 078/134] additional checks for the weighted irm

---
 doubleml/_utils_checks.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/doubleml/_utils_checks.py b/doubleml/_utils_checks.py
index c8c3cd48..94bf239d 100644
--- a/doubleml/_utils_checks.py
+++ b/doubleml/_utils_checks.py
@@ -227,19 +227,22 @@ def _check_benchmarks(benchmarks):
                                 f'{str(benchmarks["name"][i])} of type {str(type(benchmarks["name"][i]))} was passed.')
     return
 
-def _check_weights(weights, score, n_obs, n_treat):
+def _check_weights(weights, score, n_obs):
     if weights is not None:
         if score != "ATE":
             raise NotImplementedError("weights can only be set for score type 'ATE'. "
                                       f"{score} was passed.")
         if not isinstance(weights, np.ndarray):
-            raise ValueError("weights must be a numpy array. "
+            raise TypeError("weights must be a numpy array. "
                              f"weights of type {str(type(weights))} was passed.")
-        if not np.all((0 <= weights) & (weights <= 1)):
-            raise ValueError("All weights values must be between 0 and 1")
-        if len(weights.shape) != 1 or weights.shape[0] != n_obs:
-            raise ValueError(f"weights must have shape ({n_obs},). "
+        if not np.all(0 <= weights):
+            raise ValueError("All weights values must be greater or equal 0.")
+        if (weights.ndim != 1 and weights.ndim != 2) or weights.shape[0] != n_obs:
+            raise ValueError(f"weights must have shape ({n_obs},) or ({n_obs},2). "
+                             f"weights of shape {weights.shape} was passed.")
+        if weights.ndim == 2 and weights.shape[1] != 2:
+            raise ValueError(f"weights must have shape ({n_obs},) or ({n_obs},2). "
                              f"weights of shape {weights.shape} was passed.")
         if weights.sum() == 0:
-            raise ValueError(f"At least one weight must be non-zero.")
+            raise ValueError("At least one weight must be non-zero.")
     return
\ No newline at end of file

From 57df566b2f36b79cd5db610055ccbf0dc071ca64 Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:02:11 +0100
Subject: [PATCH 079/134] updated weights implementation

---
 doubleml/double_ml_irm.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 3118afa5..91a44e30 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -49,7 +49,7 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
 
     weights : array or None
         An numpy array of weights for each individual observation. If None, then the ``'ATE'`` score
-        is applied. Can only be used with ``score = 'ATE'``.
+        is applied. Can only be used with ``score = 'ATE'``. Has to be shape (n,) or (n,2).
         Default is ``None``.
 
     dml_procedure : str
@@ -167,9 +167,8 @@ def __init__(self,
 
         self._sensitivity_implemented = True
         
-        _check_weights(weights, score, obj_dml_data.n_obs, obj_dml_data.n_treat)
-        if weights is not None:
-            self._weights = weights
+        _check_weights(weights, score, obj_dml_data.n_obs)
+        self._weights, self._weights_bar = self._initialize_weights(weights)
 
     @property
     def normalize_ipw(self):
@@ -197,13 +196,21 @@ def weights(self):
         """
         Specifies the weights for a weighted ATE.
         """
-        return self._weights if hasattr(self,"_weights") else None
+        return np.c_[self._weights, self._weights_bar]
 
     def _initialize_ml_nuisance_params(self):
         valid_learner = ['ml_g0', 'ml_g1', 'ml_m']
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
                         for learner in valid_learner}
 
+    def _initialize_weights(self, weights):
+        if weights is None:
+            weights = np.ones(self._dml_data.n_obs)
+        if weights.ndim == 1:
+            return weights, weights
+        else:
+            return weights[:, 0], weights[:, 1]
+
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError('The data must be of DoubleMLData type. '
@@ -316,9 +323,9 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
 
         if isinstance(self.score, str):
             if self.score == 'ATE':
-                psi_b = g_hat1 - g_hat0 \
-                    + np.divide(np.multiply(d, u_hat1), m_hat) \
-                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
+                psi_b = self._weights * (g_hat1 - g_hat0) \
+                    + self._weights_bar * (np.divide(np.multiply(d, u_hat1), m_hat) \
+                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat))
                 psi_a = np.full_like(m_hat, -1.0)
             else:
                 assert self.score == 'ATTE'
@@ -345,8 +352,8 @@ def _sensitivity_element_est(self, preds):
 
         # use weights make this extendable
         if self.score == 'ATE':
-            weights = np.ones_like(d)
-            weights_bar = np.ones_like(d)
+            weights = self._weights
+            weights_bar = self._weights_bar
         else:
             assert self.score == 'ATTE'
             weights = np.divide(d, np.mean(d))

From 92c55cba83ee940dbdc526d4273da4a094020fb6 Mon Sep 17 00:00:00 2001
From: Schacht <65898638+OliverSchacht@users.noreply.github.com>
Date: Mon, 4 Dec 2023 17:02:37 +0100
Subject: [PATCH 080/134] add tests for weights

---
 doubleml/tests/test_doubleml_exceptions.py    | 26 ++++++++++++++++
 .../tests/test_doubleml_model_defaults.py     |  1 +
 doubleml/tests/test_irm.py                    | 30 +++++++++++++++++++
 3 files changed, 57 insertions(+)

diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index bb5a75bd..c1d8cb8e 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -425,6 +425,32 @@ def test_doubleml_exception_trimming_rule():
                           trimming_rule='truncate', trimming_threshold=0.6)
 
 
+@pytest.mark.ci
+def test_doubleml_exception_weights():
+    msg = "weights can only be set for score type 'ATE'. ATTE was passed."
+    with pytest.raises(NotImplementedError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        score='ATTE', weights = np.ones_like(dml_data_irm.d))
+    msg = "weights must be a numpy array. weights of type <class 'pandas.core.frame.DataFrame'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights = pd.DataFrame(np.ones_like(dml_data_irm.d)))
+    msg = "All weights values must be greater or equal 0."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights = -1 * np.ones_like(dml_data_irm.d))
+    msg = f"weights must have shape ({n},) or ({n},2). weights of shape ({n/2},) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights = np.ones(n/2))
+    msg = f"weights must have shape ({n},) or ({n},2). weights of shape ({n},3) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights = np.ones((n,3)))
+    msg = "At least one weight must be non-zero."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights = np.zeros((dml_data_irm.d.shape[0],2)))    
+
+
 @pytest.mark.ci
 def test_doubleml_exception_quantiles():
     msg = "Quantile has to be a float. Object of type <class 'str'> passed."
diff --git a/doubleml/tests/test_doubleml_model_defaults.py b/doubleml/tests/test_doubleml_model_defaults.py
index 9298dba3..056ddbbc 100644
--- a/doubleml/tests/test_doubleml_model_defaults.py
+++ b/doubleml/tests/test_doubleml_model_defaults.py
@@ -102,6 +102,7 @@ def test_irm_defaults():
     assert dml_irm.trimming_rule == 'truncate'
     assert dml_irm.trimming_threshold == 1e-2
     assert not dml_irm.normalize_ipw
+    assert dml_irm.weights == np.ones((500,3))
 
 
 @pytest.mark.ci
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index e4842ffa..ee633abe 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -209,3 +209,33 @@ def test_dml_irm_cate_gate():
     assert isinstance(gate_2, dml.double_ml_blp.DoubleMLBLP)
     assert isinstance(gate_2.confint(), pd.DataFrame)
     assert all(gate_2.confint().index == ["Group_1", "Group_2"])
+
+@pytest.mark.ci
+def test_dml_irm_weights():
+    n = 5000
+    # collect data
+    np.random.seed(42)
+    obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
+
+    # First stage estimation
+    ml_g = LinearRegression()
+    ml_m = LogisticRegression(penalty='none', random_state=42)
+
+    dml_irm_obj_ate = dml.DoubleMLIRM(obj_dml_data,
+                                      ml_m=ml_m,
+                                      ml_g=ml_g,
+                                      trimming_threshold=0.05,
+                                      n_folds=5)
+    
+    dml_irm_obj_atte = dml.DoubleMLIRM(obj_dml_data,
+                                       ml_m=ml_m,
+                                       ml_g=ml_g,
+                                       trimming_threshold=0.05,
+                                       n_folds=5,
+                                       score = 'ATTE') 
+    
+    dml_irm_obj_weights = dml.DoubleMLIRM(obj_dml_data,
+                                          ml_m=ml_m,
+                                          ml_g=ml_g,
+                                          trimming_threshold=0.05,
+                                          n_folds=5) 

From 5235b6cf099ef0fc3f4f7e7088c97e4e957ae5c9 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 5 Dec 2023 14:56:12 +0100
Subject: [PATCH 081/134] update irm p_hat estimation and fix format

---
 doubleml/double_ml_irm.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 91a44e30..eee8574c 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -12,7 +12,8 @@
 from .double_ml_score_mixins import LinearScoreMixin
 
 from ._utils import _dml_cv_predict, _get_cond_smpls, _dml_tune, _trimm, _normalize_ipw
-from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, _check_integer, _check_weights
+from ._utils_checks import _check_score, _check_trimming, _check_finite_predictions, _check_is_propensity, _check_integer, \
+    _check_weights
 
 
 class DoubleMLIRM(LinearScoreMixin, DoubleML):
@@ -166,7 +167,7 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
-        
+
         _check_weights(weights, score, obj_dml_data.n_obs)
         self._weights, self._weights_bar = self._initialize_weights(weights)
 
@@ -190,7 +191,7 @@ def trimming_threshold(self):
         Specifies the used trimming threshold.
         """
         return self._trimming_threshold
-    
+
     @property
     def weights(self):
         """
@@ -304,9 +305,7 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
         # fraction of treated for ATTE
         p_hat = None
         if self.score == 'ATTE':
-            p_hat = np.full_like(d, np.nan, dtype='float64')
-            for _, test_index in smpls:
-                p_hat[test_index] = np.mean(d[test_index])
+            p_hat = np.mean(d)
 
         if self.normalize_ipw:
             if self.dml_procedure == 'dml1':
@@ -324,8 +323,9 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
         if isinstance(self.score, str):
             if self.score == 'ATE':
                 psi_b = self._weights * (g_hat1 - g_hat0) \
-                    + self._weights_bar * (np.divide(np.multiply(d, u_hat1), m_hat) \
-                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat))
+                    + self._weights_bar * (
+                        np.divide(np.multiply(d, u_hat1), m_hat)
+                        - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat))
                 psi_a = np.full_like(m_hat, -1.0)
             else:
                 assert self.score == 'ATTE'

From f94e10f927a67b25477f9011743e90446eea6598 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 5 Dec 2023 14:58:40 +0100
Subject: [PATCH 082/134] add basic weights test

---
 doubleml/tests/test_irm.py | 121 ++++++++++++++++++++++++++++++-------
 1 file changed, 100 insertions(+), 21 deletions(-)

diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index ee633abe..bedf85c4 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -10,6 +10,7 @@
 
 import doubleml as dml
 from doubleml.datasets import make_irm_data
+from doubleml._utils_resampling import DoubleMLResampling
 
 from ._utils import draw_smpls
 from ._utils_irm_manual import fit_irm, boot_irm, fit_sensitivity_elements_irm
@@ -210,32 +211,110 @@ def test_dml_irm_cate_gate():
     assert isinstance(gate_2.confint(), pd.DataFrame)
     assert all(gate_2.confint().index == ["Group_1", "Group_2"])
 
-@pytest.mark.ci
-def test_dml_irm_weights():
-    n = 5000
+
+@pytest.fixture(scope='module')
+def dml_irm_weights_fixture():
+    n = 10000
     # collect data
     np.random.seed(42)
     obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
 
+    smpls = DoubleMLResampling(
+        n_folds=5,
+        n_rep=1,
+        n_obs=n,
+        apply_cross_fitting=True,
+        stratify=obj_dml_data.d).split_samples()
+
     # First stage estimation
     ml_g = LinearRegression()
     ml_m = LogisticRegression(penalty='none', random_state=42)
 
-    dml_irm_obj_ate = dml.DoubleMLIRM(obj_dml_data,
-                                      ml_m=ml_m,
-                                      ml_g=ml_g,
-                                      trimming_threshold=0.05,
-                                      n_folds=5)
-    
-    dml_irm_obj_atte = dml.DoubleMLIRM(obj_dml_data,
-                                       ml_m=ml_m,
-                                       ml_g=ml_g,
-                                       trimming_threshold=0.05,
-                                       n_folds=5,
-                                       score = 'ATTE') 
-    
-    dml_irm_obj_weights = dml.DoubleMLIRM(obj_dml_data,
-                                          ml_m=ml_m,
-                                          ml_g=ml_g,
-                                          trimming_threshold=0.05,
-                                          n_folds=5) 
+    # ATE with and without weights
+    dml_irm_obj_ate_no_weights = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g=clone(ml_g),
+        ml_m=clone(ml_m),
+        score='ATTE',
+        trimming_threshold=0.05,
+        n_folds=5,
+        draw_sample_splitting=False)
+    dml_irm_obj_ate_no_weights.set_sample_splitting(smpls)
+    np.random.seed(42)
+    dml_irm_obj_ate_no_weights.fit()
+
+    dml_irm_obj_ate_weights = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g=clone(ml_g),
+        ml_m=clone(ml_m),
+        score='ATE',
+        trimming_threshold=0.05,
+        n_folds=5,
+        draw_sample_splitting=False,
+        weights=np.ones_like(obj_dml_data.y))
+    dml_irm_obj_ate_weights.set_sample_splitting(smpls)
+    np.random.seed(42)
+    dml_irm_obj_ate_weights.fit()
+
+    # ATTE with and without weights
+    dml_irm_obj_atte_no_weights = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g=clone(ml_g),
+        ml_m=clone(ml_m),
+        score='ATTE',
+        trimming_threshold=0.05,
+        n_folds=5,
+        draw_sample_splitting=False)
+    dml_irm_obj_atte_no_weights.set_sample_splitting(smpls)
+    np.random.seed(42)
+    dml_irm_obj_atte_no_weights.fit()
+
+    m_hat = dml_irm_obj_atte_no_weights.predictions["ml_m"][:, :, 0]
+    p_hat = obj_dml_data.d.mean()
+    weights = obj_dml_data.d.reshape(-1, 1) / p_hat
+    weights_bar = m_hat / p_hat
+    combined_weights = np.concatenate((weights, weights_bar), axis=1)
+    dml_irm_obj_atte_weights = dml.DoubleMLIRM(
+        obj_dml_data,
+        ml_g=clone(ml_g),
+        ml_m=clone(ml_m),
+        trimming_threshold=0.05,
+        n_folds=5,
+        draw_sample_splitting=False,
+        weights=combined_weights)
+    dml_irm_obj_atte_weights.set_sample_splitting(smpls)
+    np.random.seed(42)
+    dml_irm_obj_atte_weights.fit()
+
+    res_dict = {
+        'coef_ate': dml_irm_obj_ate_no_weights.coef,
+        'coef_ate_weights': dml_irm_obj_ate_weights.coef,
+        'coef_atte': dml_irm_obj_atte_no_weights.coef,
+        'coef_atte_weights': dml_irm_obj_atte_weights.coef,
+        'se_ate': dml_irm_obj_ate_no_weights.se,
+        'se_ate_weights': dml_irm_obj_ate_weights.se,
+        'se_atte': dml_irm_obj_atte_no_weights.se,
+        'se_atte_weights': dml_irm_obj_atte_weights.se,
+    }
+    return res_dict
+
+
+@pytest.mark.ci
+def test_dml_irm_ate_weights(dml_irm_weights_fixture):
+    assert math.isclose(dml_irm_weights_fixture['coef_ate'],
+                        dml_irm_weights_fixture['coef_ate_weights'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(dml_irm_weights_fixture['se_ate'],
+                        dml_irm_weights_fixture['se_ate_weights'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+
+
+@pytest.mark.ci
+def test_dml_irm_atte_weights(dml_irm_weights_fixture):
+    assert math.isclose(dml_irm_weights_fixture['coef_atte'],
+                        dml_irm_weights_fixture['coef_atte_weights'],
+                        rel_tol=1e-9, abs_tol=1e-4)
+    # Remark that the scores are slightly different (Y instead of g(1,X) and coefficient of theta)
+    assert math.isclose(dml_irm_weights_fixture['se_atte'],
+                        dml_irm_weights_fixture['se_atte_weights'],
+                        rel_tol=1e-5, abs_tol=1e-3)

From 93ef1980e65fa813f69be8612c7a6d6b685b9869 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 5 Dec 2023 15:04:28 +0100
Subject: [PATCH 083/134] adjust test for multiple repetitions

---
 doubleml/tests/test_irm.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index bedf85c4..ac06eb9f 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -212,8 +212,14 @@ def test_dml_irm_cate_gate():
     assert all(gate_2.confint().index == ["Group_1", "Group_2"])
 
 
+@pytest.fixture(scope='module',
+                params=[1, 3])
+def n_rep(request):
+    return request.param
+
+
 @pytest.fixture(scope='module')
-def dml_irm_weights_fixture():
+def dml_irm_weights_fixture(n_rep):
     n = 10000
     # collect data
     np.random.seed(42)
@@ -221,7 +227,7 @@ def dml_irm_weights_fixture():
 
     smpls = DoubleMLResampling(
         n_folds=5,
-        n_rep=1,
+        n_rep=n_rep,
         n_obs=n,
         apply_cross_fitting=True,
         stratify=obj_dml_data.d).split_samples()
@@ -235,9 +241,10 @@ def dml_irm_weights_fixture():
         obj_dml_data,
         ml_g=clone(ml_g),
         ml_m=clone(ml_m),
-        score='ATTE',
+        score='ATE',
         trimming_threshold=0.05,
         n_folds=5,
+        dml_procedure='dml2',
         draw_sample_splitting=False)
     dml_irm_obj_ate_no_weights.set_sample_splitting(smpls)
     np.random.seed(42)
@@ -250,6 +257,7 @@ def dml_irm_weights_fixture():
         score='ATE',
         trimming_threshold=0.05,
         n_folds=5,
+        dml_procedure='dml2',
         draw_sample_splitting=False,
         weights=np.ones_like(obj_dml_data.y))
     dml_irm_obj_ate_weights.set_sample_splitting(smpls)
@@ -264,6 +272,7 @@ def dml_irm_weights_fixture():
         score='ATTE',
         trimming_threshold=0.05,
         n_folds=5,
+        dml_procedure='dml2',
         draw_sample_splitting=False)
     dml_irm_obj_atte_no_weights.set_sample_splitting(smpls)
     np.random.seed(42)
@@ -278,8 +287,10 @@ def dml_irm_weights_fixture():
         obj_dml_data,
         ml_g=clone(ml_g),
         ml_m=clone(ml_m),
+        score='ATE',
         trimming_threshold=0.05,
         n_folds=5,
+        dml_procedure='dml2',
         draw_sample_splitting=False,
         weights=combined_weights)
     dml_irm_obj_atte_weights.set_sample_splitting(smpls)

From 39bdab1a2b9d7b663df5d5eddc3665b29c28d24e Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 5 Dec 2023 16:00:03 +0100
Subject: [PATCH 084/134] unfinished version on to include repetitions

---
 doubleml/_utils_checks.py                  | 48 +++++++++++++++-------
 doubleml/double_ml_irm.py                  | 10 +++--
 doubleml/tests/test_doubleml_exceptions.py | 39 ++++++++++++++----
 3 files changed, 71 insertions(+), 26 deletions(-)

diff --git a/doubleml/_utils_checks.py b/doubleml/_utils_checks.py
index 94bf239d..c71d62aa 100644
--- a/doubleml/_utils_checks.py
+++ b/doubleml/_utils_checks.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pandas as pd
 import warnings
 
 from sklearn.utils.multiclass import type_of_target
@@ -227,22 +228,39 @@ def _check_benchmarks(benchmarks):
                                 f'{str(benchmarks["name"][i])} of type {str(type(benchmarks["name"][i]))} was passed.')
     return
 
-def _check_weights(weights, score, n_obs):
+
+def _check_weights(weights, score, n_obs, n_rep):
     if weights is not None:
         if score != "ATE":
             raise NotImplementedError("weights can only be set for score type 'ATE'. "
                                       f"{score} was passed.")
-        if not isinstance(weights, np.ndarray):
-            raise TypeError("weights must be a numpy array. "
-                             f"weights of type {str(type(weights))} was passed.")
-        if not np.all(0 <= weights):
-            raise ValueError("All weights values must be greater or equal 0.")
-        if (weights.ndim != 1 and weights.ndim != 2) or weights.shape[0] != n_obs:
-            raise ValueError(f"weights must have shape ({n_obs},) or ({n_obs},2). "
-                             f"weights of shape {weights.shape} was passed.")
-        if weights.ndim == 2 and weights.shape[1] != 2:
-            raise ValueError(f"weights must have shape ({n_obs},) or ({n_obs},2). "
-                             f"weights of shape {weights.shape} was passed.")
-        if weights.sum() == 0:
-            raise ValueError("At least one weight must be non-zero.")
-    return
\ No newline at end of file
+        if (not isinstance(weights, np.ndarray)) and (not isinstance(weights, dict)):
+            raise TypeError("weights must be a numpy array or dictionary. "
+                            f"weights of type {str(type(weights))} was passed.")
+        if isinstance(weights, np.ndarray):
+            if (weights.ndim != 1) or weights.shape[0] != n_obs:
+                raise ValueError(f"weights must have shape ({n_obs},). "
+                                 f"weights of shape {weights.shape} was passed.")
+            if not np.all(0 <= weights):
+                raise ValueError("All weights values must be greater or equal 0.")
+            if weights.sum() == 0:
+                raise ValueError("At least one weight must be non-zero.")
+
+        if isinstance(weights, dict):
+            expected_keys = ["weights", "weights_bar"]
+            if not set(weights.keys()) == set(expected_keys):
+                raise ValueError(f"weights must have keys {expected_keys}. "
+                                 f"keys {str(weights.keys())} were passed.")
+
+            expected_shapes = [(n_obs,), (n_obs, n_rep)]
+            if weights["weights"].shape != expected_shapes[0]:
+                raise ValueError(f"weights must have shape {expected_shapes[0]}. "
+                                 f"weights of shape {weights['weights'].shape} was passed.")
+            if weights["weights_bar"].shape != expected_shapes[1]:
+                raise ValueError(f"weights must have shape {expected_shapes[1]}. "
+                                 f"weights of shape {weights['weights_bar'].shape} was passed.")
+            if (not np.all(weights["weights"] >= 0)) or (not np.all(weights["weights_bar"] >= 1)):
+                raise ValueError("All weights values must be greater or equal 0.")
+            if (weights["weights"].sum() == 0) or (weights["weights_bar"].sum() == 0):
+                raise ValueError("At least one weight must be non-zero.")
+    return
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index eee8574c..91dcfcff 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -48,9 +48,13 @@ class DoubleMLIRM(LinearScoreMixin, DoubleML):
         or a callable object / function with signature ``psi_a, psi_b = score(y, d, g_hat0, g_hat1, m_hat, smpls)``.
         Default is ``'ATE'``.
 
-    weights : array or None
+    weights : array, dict or None
         An numpy array of weights for each individual observation. If None, then the ``'ATE'`` score
-        is applied. Can only be used with ``score = 'ATE'``. Has to be shape (n,) or (n,2).
+        is applied (corresponds to weights equal to 1). Can only be used with ``score = 'ATE'``.
+        An array has to be of shape ``(n,)``, where ``n`` is the number of observations.
+        A dictionary can be used to specify weights which depend on the treatment variable.
+        In this case, the dictionary has to contain two keys ``weights`` and ``weights_bar``, where the values
+        have to be arrays of shape ``(n,)`` and ``(n, n_rep)``.
         Default is ``None``.
 
     dml_procedure : str
@@ -168,7 +172,7 @@ def __init__(self,
 
         self._sensitivity_implemented = True
 
-        _check_weights(weights, score, obj_dml_data.n_obs)
+        _check_weights(weights, score, obj_dml_data.n_obs, self.n_rep)
         self._weights, self._weights_bar = self._initialize_weights(weights)
 
     @property
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index c1d8cb8e..55f02255 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -2,7 +2,7 @@
 import pandas as pd
 import numpy as np
 
-from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData,\
+from doubleml import DoubleMLPLR, DoubleMLIRM, DoubleMLIIVM, DoubleMLPLIV, DoubleMLData, \
     DoubleMLClusterData, DoubleMLPQ, DoubleMLLPQ, DoubleMLCVAR, DoubleMLQTE, DoubleMLDID, DoubleMLDIDCS
 from doubleml.datasets import make_plr_CCDDHNR2018, make_irm_data, make_pliv_CHS2015, make_iivm_data, \
     make_pliv_multiway_cluster_CKMS2021, make_did_SZ2020
@@ -430,25 +430,48 @@ def test_doubleml_exception_weights():
     msg = "weights can only be set for score type 'ATE'. ATTE was passed."
     with pytest.raises(NotImplementedError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        score='ATTE', weights = np.ones_like(dml_data_irm.d))
-    msg = "weights must be a numpy array. weights of type <class 'pandas.core.frame.DataFrame'> was passed."
+                        score='ATTE', weights=np.ones_like(dml_data_irm.d))
+    msg = "weights must be a numpy array or dictionary. weights of type <class 'int'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=1)
+    msg = r"weights must have keys ['weights', 'weights_bar']. keys dict_keys\(['d']\) were passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights={'d': [1, 2, 3]})
+
+    # shape checks
+    msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones(1,))
+    msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones((n, 2)))
+
+    msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        w = {'d': np.ones(1,)}
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones(1,))
+    msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones((n, 2)))  
+
+    # value checks
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights = pd.DataFrame(np.ones_like(dml_data_irm.d)))
+                        weights=1)
     msg = "All weights values must be greater or equal 0."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights = -1 * np.ones_like(dml_data_irm.d))
+                        weights=-1*np.ones_like(dml_data_irm.d))
     msg = f"weights must have shape ({n},) or ({n},2). weights of shape ({n/2},) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights = np.ones(n/2))
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones(n/2))
     msg = f"weights must have shape ({n},) or ({n},2). weights of shape ({n},3) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights = np.ones((n,3)))
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones((n, 3)))
     msg = "At least one weight must be non-zero."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights = np.zeros((dml_data_irm.d.shape[0],2)))    
+                        weights=np.zeros((dml_data_irm.d.shape[0], )))
 
 
 @pytest.mark.ci

From 30dfff3fd364554bd70ab35e1eb5cd47c9326f17 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 5 Dec 2023 21:15:12 +0100
Subject: [PATCH 085/134] update exception and default tests

---
 doubleml/_utils_checks.py                     |  6 +--
 doubleml/tests/test_doubleml_exceptions.py    | 37 ++++++++++++-------
 .../tests/test_doubleml_model_defaults.py     |  2 +-
 3 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/doubleml/_utils_checks.py b/doubleml/_utils_checks.py
index c71d62aa..2d75b48d 100644
--- a/doubleml/_utils_checks.py
+++ b/doubleml/_utils_checks.py
@@ -257,9 +257,9 @@ def _check_weights(weights, score, n_obs, n_rep):
                 raise ValueError(f"weights must have shape {expected_shapes[0]}. "
                                  f"weights of shape {weights['weights'].shape} was passed.")
             if weights["weights_bar"].shape != expected_shapes[1]:
-                raise ValueError(f"weights must have shape {expected_shapes[1]}. "
-                                 f"weights of shape {weights['weights_bar'].shape} was passed.")
-            if (not np.all(weights["weights"] >= 0)) or (not np.all(weights["weights_bar"] >= 1)):
+                raise ValueError(f"weights_bar must have shape {expected_shapes[1]}. "
+                                 f"weights_bar of shape {weights['weights_bar'].shape} was passed.")
+            if (not np.all(weights["weights"] >= 0)) or (not np.all(weights["weights_bar"] >= 0)):
                 raise ValueError("All weights values must be greater or equal 0.")
             if (weights["weights"].sum() == 0) or (weights["weights_bar"].sum() == 0):
                 raise ValueError("At least one weight must be non-zero.")
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index 55f02255..a8af9033 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -434,44 +434,55 @@ def test_doubleml_exception_weights():
     msg = "weights must be a numpy array or dictionary. weights of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=1)
-    msg = r"weights must have keys ['weights', 'weights_bar']. keys dict_keys\(['d']\) were passed."
+    msg = r"weights must have keys \['weights', 'weights_bar'\]. keys dict_keys\(\['d'\]\) were passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights={'d': [1, 2, 3]})
 
     # shape checks
     msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones(1,))
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones(1))
     msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones((n, 2)))
 
     msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
     with pytest.raises(ValueError, match=msg):
-        w = {'d': np.ones(1,)}
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones(1,))
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones(1), 'weights_bar': np.ones(1)})
     msg = rf"weights must have shape \({n},\). weights of shape \({n}, 2\) was passed."
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones((n, 2)))  
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones((n, 2)), 'weights_bar': np.ones((n, 2))})
+    msg = rf"weights_bar must have shape \({n}, 1\). weights_bar of shape \({n}, 2\) was passed."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones(n), 'weights_bar': np.ones((n, 2))})
 
     # value checks
-    with pytest.raises(TypeError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights=1)
     msg = "All weights values must be greater or equal 0."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        weights=-1*np.ones_like(dml_data_irm.d))
-    msg = f"weights must have shape ({n},) or ({n},2). weights of shape ({n/2},) was passed."
+                        weights=-1*np.ones(n,))
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones(n/2))
-    msg = f"weights must have shape ({n},) or ({n},2). weights of shape ({n},3) was passed."
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': -1*np.ones(n,), 'weights_bar': np.ones((n, 1))})
     with pytest.raises(ValueError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=np.ones((n, 3)))
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones(n,), 'weights_bar': -1*np.ones((n, 1))})
+
     msg = "At least one weight must be non-zero."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
                         weights=np.zeros((dml_data_irm.d.shape[0], )))
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.zeros((dml_data_irm.d.shape[0], )),
+                                 'weights_bar': np.ones((dml_data_irm.d.shape[0], 1))})
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        weights={'weights': np.ones((dml_data_irm.d.shape[0], )),
+                                 'weights_bar': np.zeros((dml_data_irm.d.shape[0], 1))})
 
 
 @pytest.mark.ci
diff --git a/doubleml/tests/test_doubleml_model_defaults.py b/doubleml/tests/test_doubleml_model_defaults.py
index 056ddbbc..ad82e81e 100644
--- a/doubleml/tests/test_doubleml_model_defaults.py
+++ b/doubleml/tests/test_doubleml_model_defaults.py
@@ -102,7 +102,7 @@ def test_irm_defaults():
     assert dml_irm.trimming_rule == 'truncate'
     assert dml_irm.trimming_threshold == 1e-2
     assert not dml_irm.normalize_ipw
-    assert dml_irm.weights == np.ones((500,3))
+    assert np.array_equal(dml_irm.weights, np.ones((500,)))
 
 
 @pytest.mark.ci

From cef6a385f982061b080623dcb62eb68f9f0ba3c4 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 6 Dec 2023 09:55:04 +0100
Subject: [PATCH 086/134] update tests and weights

---
 doubleml/double_ml.py                         |  2 +-
 doubleml/double_ml_irm.py                     | 27 ++++++++++++-------
 doubleml/tests/_utils_irm_manual.py           |  2 +-
 .../tests/test_doubleml_model_defaults.py     |  3 ++-
 doubleml/tests/test_irm.py                    | 10 ++++---
 5 files changed, 29 insertions(+), 15 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 082c6f3c..8b82aa5e 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -124,7 +124,7 @@ def __init__(self,
             self.draw_sample_splitting()
 
         # initialize arrays according to obj_dml_data and the resampling settings
-        self._psi, self._psi_deriv, self._psi_elements,\
+        self._psi, self._psi_deriv, self._psi_elements, \
             self._coef, self._se, self._all_coef, self._all_se, self._all_dml1_coef = self._initialize_arrays()
 
         # also initialize bootstrap arrays with the default number of bootstrap replications
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 91dcfcff..ddb57e7f 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -173,7 +173,7 @@ def __init__(self,
         self._sensitivity_implemented = True
 
         _check_weights(weights, score, obj_dml_data.n_obs, self.n_rep)
-        self._weights, self._weights_bar = self._initialize_weights(weights)
+        self._initialize_weights(weights)
 
     @property
     def normalize_ipw(self):
@@ -201,7 +201,7 @@ def weights(self):
         """
         Specifies the weights for a weighted ATE.
         """
-        return np.c_[self._weights, self._weights_bar]
+        return self._weights
 
     def _initialize_ml_nuisance_params(self):
         valid_learner = ['ml_g0', 'ml_g1', 'ml_m']
@@ -211,10 +211,19 @@ def _initialize_ml_nuisance_params(self):
     def _initialize_weights(self, weights):
         if weights is None:
             weights = np.ones(self._dml_data.n_obs)
-        if weights.ndim == 1:
-            return weights, weights
+        if isinstance(weights, np.ndarray):
+            self._weights = {'weights': weights}
         else:
-            return weights[:, 0], weights[:, 1]
+            assert isinstance(weights, dict)
+            self._weights = weights
+
+    def _get_weights(self):
+        weights = self._weights['weights']
+        if 'weights_bar' not in self._weights.keys():
+            weights_bar = self._weights['weights']
+        else:
+            weights_bar = self._weights['weights_bar'][:, self._i_rep]
+        return weights, weights_bar
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
@@ -326,8 +335,9 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
 
         if isinstance(self.score, str):
             if self.score == 'ATE':
-                psi_b = self._weights * (g_hat1 - g_hat0) \
-                    + self._weights_bar * (
+                weights, weights_bar = self._get_weights()
+                psi_b = weights * (g_hat1 - g_hat0) \
+                    + weights_bar * (
                         np.divide(np.multiply(d, u_hat1), m_hat)
                         - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat))
                 psi_a = np.full_like(m_hat, -1.0)
@@ -356,8 +366,7 @@ def _sensitivity_element_est(self, preds):
 
         # use weights make this extendable
         if self.score == 'ATE':
-            weights = self._weights
-            weights_bar = self._weights_bar
+            weights, weights_bar = self._get_weights()
         else:
             assert self.score == 'ATTE'
             weights = np.divide(d, np.mean(d))
diff --git a/doubleml/tests/_utils_irm_manual.py b/doubleml/tests/_utils_irm_manual.py
index 5f162bee..413ff711 100644
--- a/doubleml/tests/_utils_irm_manual.py
+++ b/doubleml/tests/_utils_irm_manual.py
@@ -81,7 +81,7 @@ def fit_nuisance_irm(y, x, d, learner_g, learner_m, smpls, score,
 
     p_hat_list = []
     for (_, test_index) in smpls:
-        p_hat_list.append(np.mean(d[test_index]))
+        p_hat_list.append(np.mean(d))
 
     return g_hat0_list, g_hat1_list, m_hat_list, p_hat_list
 
diff --git a/doubleml/tests/test_doubleml_model_defaults.py b/doubleml/tests/test_doubleml_model_defaults.py
index ad82e81e..7c2d7b38 100644
--- a/doubleml/tests/test_doubleml_model_defaults.py
+++ b/doubleml/tests/test_doubleml_model_defaults.py
@@ -102,7 +102,8 @@ def test_irm_defaults():
     assert dml_irm.trimming_rule == 'truncate'
     assert dml_irm.trimming_threshold == 1e-2
     assert not dml_irm.normalize_ipw
-    assert np.array_equal(dml_irm.weights, np.ones((500,)))
+    assert set(dml_irm.weights.keys()) == set(['weights'])
+    assert np.array_equal(dml_irm.weights['weights'], np.ones((dml_irm._dml_data.n_obs,)))
 
 
 @pytest.mark.ci
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index ac06eb9f..7a08dca3 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -244,6 +244,7 @@ def dml_irm_weights_fixture(n_rep):
         score='ATE',
         trimming_threshold=0.05,
         n_folds=5,
+        n_rep=n_rep,
         dml_procedure='dml2',
         draw_sample_splitting=False)
     dml_irm_obj_ate_no_weights.set_sample_splitting(smpls)
@@ -257,6 +258,7 @@ def dml_irm_weights_fixture(n_rep):
         score='ATE',
         trimming_threshold=0.05,
         n_folds=5,
+        n_rep=n_rep,
         dml_procedure='dml2',
         draw_sample_splitting=False,
         weights=np.ones_like(obj_dml_data.y))
@@ -272,6 +274,7 @@ def dml_irm_weights_fixture(n_rep):
         score='ATTE',
         trimming_threshold=0.05,
         n_folds=5,
+        n_rep=n_rep,
         dml_procedure='dml2',
         draw_sample_splitting=False)
     dml_irm_obj_atte_no_weights.set_sample_splitting(smpls)
@@ -280,9 +283,9 @@ def dml_irm_weights_fixture(n_rep):
 
     m_hat = dml_irm_obj_atte_no_weights.predictions["ml_m"][:, :, 0]
     p_hat = obj_dml_data.d.mean()
-    weights = obj_dml_data.d.reshape(-1, 1) / p_hat
+    weights = obj_dml_data.d / p_hat
     weights_bar = m_hat / p_hat
-    combined_weights = np.concatenate((weights, weights_bar), axis=1)
+    weight_dict = {'weights': weights, 'weights_bar': weights_bar}
     dml_irm_obj_atte_weights = dml.DoubleMLIRM(
         obj_dml_data,
         ml_g=clone(ml_g),
@@ -290,9 +293,10 @@ def dml_irm_weights_fixture(n_rep):
         score='ATE',
         trimming_threshold=0.05,
         n_folds=5,
+        n_rep=n_rep,
         dml_procedure='dml2',
         draw_sample_splitting=False,
-        weights=combined_weights)
+        weights=weight_dict)
     dml_irm_obj_atte_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_atte_weights.fit()

From ba72cac5ac7862bcd0c8ec83610b3271d3e47dd2 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 6 Dec 2023 13:51:16 +0100
Subject: [PATCH 087/134] fix unit-test for LPQ external predictions

---
 doubleml/tests/test_lpq_external_predictions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index db321b37..2a13b4bc 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -27,7 +27,7 @@ def normalize_ipw(request):
 def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_iivm_data(theta=0.5, n_obs=500, dim_x=20, alpha_x=1.0, return_type="DataFrame")
+    data = make_iivm_data(theta=0.5, n_obs=2000, dim_x=10, alpha_x=1.0, return_type="DataFrame")
 
     dml_data = DoubleMLData(data, "y", "d", z_cols="z")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=dml_data.d)

From e282e2e7e79f3a5b2574cbb081dae8021995ec83 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 6 Dec 2023 15:31:08 +0100
Subject: [PATCH 088/134] update irm test

---
 doubleml/tests/test_irm.py | 35 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 23 deletions(-)

diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 7a08dca3..0c4a7470 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -219,11 +219,18 @@ def n_rep(request):
 
 
 @pytest.fixture(scope='module')
-def dml_irm_weights_fixture(n_rep):
+def dml_irm_weights_fixture(n_rep, dml_procedure):
     n = 10000
     # collect data
     np.random.seed(42)
     obj_dml_data = make_irm_data(n_obs=n, dim_x=2)
+    kwargs = {
+        "trimming_threshold": 0.05,
+        "n_folds": 5,
+        "n_rep": n_rep,
+        "dml_procedure": dml_procedure,
+        "draw_sample_splitting": False
+    }
 
     smpls = DoubleMLResampling(
         n_folds=5,
@@ -242,11 +249,7 @@ def dml_irm_weights_fixture(n_rep):
         ml_g=clone(ml_g),
         ml_m=clone(ml_m),
         score='ATE',
-        trimming_threshold=0.05,
-        n_folds=5,
-        n_rep=n_rep,
-        dml_procedure='dml2',
-        draw_sample_splitting=False)
+        **kwargs)
     dml_irm_obj_ate_no_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_ate_no_weights.fit()
@@ -256,12 +259,7 @@ def dml_irm_weights_fixture(n_rep):
         ml_g=clone(ml_g),
         ml_m=clone(ml_m),
         score='ATE',
-        trimming_threshold=0.05,
-        n_folds=5,
-        n_rep=n_rep,
-        dml_procedure='dml2',
-        draw_sample_splitting=False,
-        weights=np.ones_like(obj_dml_data.y))
+        weights=np.ones_like(obj_dml_data.y), **kwargs)
     dml_irm_obj_ate_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_ate_weights.fit()
@@ -272,11 +270,7 @@ def dml_irm_weights_fixture(n_rep):
         ml_g=clone(ml_g),
         ml_m=clone(ml_m),
         score='ATTE',
-        trimming_threshold=0.05,
-        n_folds=5,
-        n_rep=n_rep,
-        dml_procedure='dml2',
-        draw_sample_splitting=False)
+        **kwargs)
     dml_irm_obj_atte_no_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_atte_no_weights.fit()
@@ -291,12 +285,7 @@ def dml_irm_weights_fixture(n_rep):
         ml_g=clone(ml_g),
         ml_m=clone(ml_m),
         score='ATE',
-        trimming_threshold=0.05,
-        n_folds=5,
-        n_rep=n_rep,
-        dml_procedure='dml2',
-        draw_sample_splitting=False,
-        weights=weight_dict)
+        weights=weight_dict, **kwargs)
     dml_irm_obj_atte_weights.set_sample_splitting(smpls)
     np.random.seed(42)
     dml_irm_obj_atte_weights.fit()

From 35ee9762af256a5c7ede3c97823fa3851251c8f4 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Wed, 6 Dec 2023 16:44:50 +0100
Subject: [PATCH 089/134] update pq model for individual external prediction

---
 doubleml/double_ml_pq.py                      | 347 ++++++++++--------
 .../tests/test_pq_external_predictions.py     |  32 +-
 2 files changed, 215 insertions(+), 164 deletions(-)

diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index d785429f..546cbe92 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -8,11 +8,26 @@
 from .double_ml_score_mixins import NonLinearScoreMixin
 from .double_ml_data import DoubleMLData
 
-from ._utils import _dml_cv_predict, _trimm, _predict_zero_one_propensity, _get_bracket_guess, \
-    _default_kde, _normalize_ipw, _dml_tune, _solve_ipw_score, _cond_targets
+from ._utils import (
+    _dml_cv_predict,
+    _trimm,
+    _predict_zero_one_propensity,
+    _get_bracket_guess,
+    _default_kde,
+    _normalize_ipw,
+    _dml_tune,
+    _solve_ipw_score,
+    _cond_targets,
+)
 from ._utils_resampling import DoubleMLResampling
-from ._utils_checks import _check_score, _check_trimming, _check_zero_one_treatment, _check_treatment, \
-    _check_contains_iv, _check_quantile
+from ._utils_checks import (
+    _check_score,
+    _check_trimming,
+    _check_zero_one_treatment,
+    _check_treatment,
+    _check_contains_iv,
+    _check_quantile,
+)
 
 
 class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
@@ -100,29 +115,25 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     d  0.553878  0.149858  3.696011  0.000219  0.260161  0.847595
     """
 
-    def __init__(self,
-                 obj_dml_data,
-                 ml_g,
-                 ml_m,
-                 treatment=1,
-                 quantile=0.5,
-                 n_folds=5,
-                 n_rep=1,
-                 score='PQ',
-                 dml_procedure='dml2',
-                 normalize_ipw=True,
-                 kde=None,
-                 trimming_rule='truncate',
-                 trimming_threshold=1e-2,
-                 draw_sample_splitting=True,
-                 apply_cross_fitting=True):
-        super().__init__(obj_dml_data,
-                         n_folds,
-                         n_rep,
-                         score,
-                         dml_procedure,
-                         draw_sample_splitting,
-                         apply_cross_fitting)
+    def __init__(
+        self,
+        obj_dml_data,
+        ml_g,
+        ml_m,
+        treatment=1,
+        quantile=0.5,
+        n_folds=5,
+        n_rep=1,
+        score="PQ",
+        dml_procedure="dml2",
+        normalize_ipw=True,
+        kde=None,
+        trimming_rule="truncate",
+        trimming_threshold=1e-2,
+        draw_sample_splitting=True,
+        apply_cross_fitting=True,
+    ):
+        super().__init__(obj_dml_data, n_folds, n_rep, score, dml_procedure, draw_sample_splitting, apply_cross_fitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -130,21 +141,21 @@ def __init__(self,
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError('kde should be either a callable or None. '
-                                '%r was passed.' % kde)
+                raise TypeError("kde should be either a callable or None. " "%r was passed." % kde)
             self._kde = kde
 
         self._normalize_ipw = normalize_ipw
         self._check_data(self._dml_data)
 
-        valid_score = ['PQ']
+        valid_score = ["PQ"]
         _check_score(self.score, valid_score, allow_callable=False)
         _check_quantile(self.quantile)
         _check_treatment(self.treatment)
 
         if not isinstance(self.normalize_ipw, bool):
-            raise TypeError('Normalization indicator has to be boolean. ' +
-                            f'Object of type {str(type(self.normalize_ipw))} passed.')
+            raise TypeError(
+                "Normalization indicator has to be boolean. " + f"Object of type {str(type(self.normalize_ipw))} passed."
+            )
 
         # initialize starting values and bounds
         self._coef_bounds = (self._dml_data.y.min(), self._dml_data.y.max())
@@ -155,19 +166,21 @@ def __init__(self,
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
-        _ = self._check_learner(ml_g, 'ml_g', regressor=False, classifier=True)
-        _ = self._check_learner(ml_m, 'ml_m', regressor=False, classifier=True)
-        self._learner = {'ml_g': ml_g, 'ml_m': ml_m}
-        self._predict_method = {'ml_g': 'predict_proba', 'ml_m': 'predict_proba'}
+        _ = self._check_learner(ml_g, "ml_g", regressor=False, classifier=True)
+        _ = self._check_learner(ml_m, "ml_m", regressor=False, classifier=True)
+        self._learner = {"ml_g": ml_g, "ml_m": ml_m}
+        self._predict_method = {"ml_g": "predict_proba", "ml_m": "predict_proba"}
 
         self._initialize_ml_nuisance_params()
 
         if draw_sample_splitting:
-            obj_dml_resampling = DoubleMLResampling(n_folds=self.n_folds,
-                                                    n_rep=self.n_rep,
-                                                    n_obs=self._dml_data.n_obs,
-                                                    apply_cross_fitting=self.apply_cross_fitting,
-                                                    stratify=self._dml_data.d)
+            obj_dml_resampling = DoubleMLResampling(
+                n_folds=self.n_folds,
+                n_rep=self.n_rep,
+                n_obs=self._dml_data.n_obs,
+                apply_cross_fitting=self.apply_cross_fitting,
+                stratify=self._dml_data.d,
+            )
             self._smpls = obj_dml_resampling.split_samples()
 
     @property
@@ -214,36 +227,36 @@ def trimming_threshold(self):
 
     @property
     def _score_element_names(self):
-        return ['ind_d', 'g', 'm', 'y']
+        return ["ind_d", "g", "m", "y"]
 
     def _compute_ipw_score(self, theta, d, y, prop):
         score = (d == self.treatment) / prop * (y <= theta) - self.quantile
         return score
 
     def _compute_score(self, psi_elements, coef, inds=None):
-        ind_d = psi_elements['ind_d']
-        g = psi_elements['g']
-        m = psi_elements['m']
-        y = psi_elements['y']
+        ind_d = psi_elements["ind_d"]
+        g = psi_elements["g"]
+        m = psi_elements["m"]
+        y = psi_elements["y"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            g = psi_elements['g'][inds]
-            m = psi_elements['m'][inds]
-            y = psi_elements['y'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            g = psi_elements["g"][inds]
+            m = psi_elements["m"][inds]
+            y = psi_elements["y"][inds]
 
         score = ind_d * ((y <= coef) - g) / m + g - self.quantile
         return score
 
     def _compute_score_deriv(self, psi_elements, coef, inds=None):
-        ind_d = psi_elements['ind_d']
-        m = psi_elements['m']
-        y = psi_elements['y']
+        ind_d = psi_elements["ind_d"]
+        m = psi_elements["m"]
+        y = psi_elements["y"]
 
         if inds is not None:
-            ind_d = psi_elements['ind_d'][inds]
-            m = psi_elements['m'][inds]
-            y = psi_elements['y'][inds]
+            ind_d = psi_elements["ind_d"][inds]
+            m = psi_elements["m"][inds]
+            y = psi_elements["y"][inds]
 
         score_weights = ind_d / m
 
@@ -253,74 +266,85 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
         return deriv
 
     def _initialize_ml_nuisance_params(self):
-        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
-                        for learner in ['ml_g', 'ml_m']}
+        self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols} for learner in ["ml_g", "ml_m"]}
 
     def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=False):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
-        
-        g = external_predictions['ml_g'] is not None
-        m = external_predictions['ml_m'] is not None
-        
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+
+        g_external = external_predictions["ml_g"] is not None
+        m_external = external_predictions["ml_m"] is not None
+
         # initialize nuisance predictions, targets and models
-        
-        if not (g and m):
-            g_hat = {'models': None,
-                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                    'preds': np.full(shape=self._dml_data.n_obs, fill_value=np.nan)
-                    }
-            m_hat = copy.deepcopy(g_hat)
-            ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
-            # initialize models
-            fitted_models = {}
-            for learner in self.params_names:
-                # set nuisance model parameters
+
+        if not g_external:
+            g_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+        if not m_external:
+            m_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+        ipw_vec = np.full(shape=self.n_folds, fill_value=np.nan)
+        # initialize models
+        fitted_models = {}
+        for learner in self.params_names:
+            # set nuisance model parameters
+            if (learner == "ml_g" and not g_external) or (learner == "ml_m" and not m_external):
                 est_params = self._get_params(learner)
                 if est_params is not None:
-                    fitted_models[learner] = [clone(self._learner[learner]).set_params(**est_params[i_fold])
-                                            for i_fold in range(self.n_folds)]
+                    fitted_models[learner] = [
+                        clone(self._learner[learner]).set_params(**est_params[i_fold]) for i_fold in range(self.n_folds)
+                    ]
                 else:
                     fitted_models[learner] = [clone(self._learner[learner]) for i_fold in range(self.n_folds)]
-        elif (g and not m) or (m and not g):
-            raise ValueError('External predictions for both g and m are required.')
-        else:
-            g_hat = {'models': None,
-                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                    'preds': external_predictions['ml_g']
-                    }
-            m_hat = {'models': None,
-                    'targets': np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
-                    'preds': external_predictions['ml_m']
-                    }
+        if g_external:
+            g_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_g"],
+            }
+        if m_external:
+            m_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": external_predictions["ml_m"],
+            }
 
         # caculate nuisance functions over different folds
-        if not (g and m): 
+        if not all([g_external, m_external]):
             for i_fold in range(self.n_folds):
                 train_inds = smpls[i_fold][0]
                 test_inds = smpls[i_fold][1]
 
                 # start nested crossfitting
-                train_inds_1, train_inds_2 = train_test_split(train_inds, test_size=0.5,
-                                                            random_state=42, stratify=d[train_inds])
-                smpls_prelim = [(train, test) for train, test in
-                                StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])]
+                train_inds_1, train_inds_2 = train_test_split(
+                    train_inds, test_size=0.5, random_state=42, stratify=d[train_inds]
+                )
+                smpls_prelim = [
+                    (train, test)
+                    for train, test in StratifiedKFold(n_splits=self.n_folds).split(X=train_inds_1, y=d[train_inds_1])
+                ]
 
                 d_train_1 = d[train_inds_1]
                 y_train_1 = y[train_inds_1]
                 x_train_1 = x[train_inds_1, :]
 
-                # get a copy of ml_m as a preliminary learner
-                ml_m_prelim = clone(fitted_models['ml_m'][i_fold])
-                m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1,
-                                            method='predict_proba', smpls=smpls_prelim)['preds']
-
-                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
-
+                if not m_external:
+                    # get a copy of ml_m as a preliminary learner
+                    ml_m_prelim = clone(fitted_models["ml_m"][i_fold])
+                    m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)[
+                        "preds"
+                    ]
+                else:
+                    m_hat_prelim = m_hat["preds"][np.concatenate([test for train, test in smpls_prelim])]
+                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)   
                 if self._normalize_ipw:
-                    m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
+                        m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
                 if self.treatment == 0:
                     m_hat_prelim = 1 - m_hat_prelim
 
@@ -341,33 +365,34 @@ def ipw_score(theta):
                 dx_treat_train_2 = x_train_2[d_train_2 == self.treatment, :]
                 y_treat_train_2 = y_train_2[d_train_2 == self.treatment]
 
-                fitted_models['ml_g'][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
-
-                # predict nuisance values on the test data and the corresponding targets
-                g_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_g'][i_fold], x[test_inds, :])
-                g_hat['targets'][test_inds] = y[test_inds] <= ipw_est
+                if not g_external:
+                    fitted_models["ml_g"][i_fold].fit(dx_treat_train_2, y_treat_train_2 <= ipw_est)
 
-                # refit the propensity score on the whole training set
-                fitted_models['ml_m'][i_fold].fit(x[train_inds, :], d[train_inds])
-                m_hat['preds'][test_inds] = _predict_zero_one_propensity(fitted_models['ml_m'][i_fold], x[test_inds, :])
+                    # predict nuisance values on the test data and the corresponding targets
+                    g_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_g"][i_fold], x[test_inds, :])
+                    g_hat["targets"][test_inds] = y[test_inds] <= ipw_est
+                if not m_external:
+                    # refit the propensity score on the whole training set
+                    fitted_models["ml_m"][i_fold].fit(x[train_inds, :], d[train_inds])
+                    m_hat["preds"][test_inds] = _predict_zero_one_propensity(fitted_models["ml_m"][i_fold], x[test_inds, :])
 
         # set target for propensity score
-        m_hat['targets'] = d
+        m_hat["targets"] = d
 
         # set the target for g to be a float and only relevant values
-        g_hat['targets'] = _cond_targets(g_hat['targets'], cond_sample=(d == self.treatment))
+        g_hat["targets"] = _cond_targets(g_hat["targets"], cond_sample=(d == self.treatment))
 
         if return_models:
-            g_hat['models'] = fitted_models['ml_g']
-            m_hat['models'] = fitted_models['ml_m']
+            g_hat["models"] = fitted_models["ml_g"]
+            m_hat["models"] = fitted_models["ml_m"]
 
         # clip propensities and normalize ipw weights
         # this is not done in the score to save computation due to multiple score evaluations
         # to be able to evaluate the raw models the m_hat['preds'] are not changed
-        #if not (g and m):
-        m_hat_adj = _trimm(m_hat['preds'], self.trimming_rule, self.trimming_threshold)
+
+        m_hat_adj = _trimm(m_hat["preds"], self.trimming_rule, self.trimming_threshold)
         if self._normalize_ipw:
-            if self.dml_procedure == 'dml1':
+            if self.dml_procedure == "dml1":
                 for _, test_index in smpls:
                     m_hat_adj[test_index] = _normalize_ipw(m_hat_adj[test_index], d[test_index])
             else:
@@ -376,65 +401,73 @@ def ipw_score(theta):
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
         # readjust start value for minimization
-        if not (g and m):
+        if not (g_external or m_external):
             self._coef_start_val = np.mean(ipw_vec)
-        #else:
-        #    m_hat_adj = m_hat['preds']
-            
-
-        psi_elements = {'ind_d': d == self.treatment, 'g': g_hat['preds'],
-                        'm': m_hat_adj, 'y': y}
-
-        preds = {'predictions': {'ml_g': g_hat['preds'],
-                                 'ml_m': m_hat['preds']},
-                 'targets': {'ml_g': g_hat['targets'],
-                             'ml_m': m_hat['targets']},
-                 'models': {'ml_g': g_hat['models'],
-                            'ml_m': m_hat['models']}
-                 }
+
+        psi_elements = {"ind_d": d == self.treatment, "g": g_hat["preds"], "m": m_hat_adj, "y": y}
+
+        preds = {
+            "predictions": {"ml_g": g_hat["preds"], "ml_m": m_hat["preds"]},
+            "targets": {"ml_g": g_hat["targets"], "ml_m": m_hat["targets"]},
+            "models": {"ml_g": g_hat["models"], "ml_m": m_hat["models"]},
+        }
         return psi_elements, preds
 
-    def _nuisance_tuning(self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv,
-                         search_mode, n_iter_randomized_search):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y,
-                         force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d,
-                         force_all_finite=False)
+    def _nuisance_tuning(
+        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+    ):
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
 
         if scoring_methods is None:
-            scoring_methods = {'ml_g': None,
-                               'ml_m': None}
+            scoring_methods = {"ml_g": None, "ml_m": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
         train_inds_treat = [np.intersect1d(np.where(d == self.treatment)[0], train) for train, _ in smpls]
 
         # use self._coef_start_val as a very crude approximation of ipw_est
         approx_goal = y <= np.quantile(y[d == self.treatment], self.quantile)
-        g_tune_res = _dml_tune(approx_goal, x, train_inds_treat,
-                               self._learner['ml_g'], param_grids['ml_g'], scoring_methods['ml_g'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
-
-        m_tune_res = _dml_tune(d, x, train_inds,
-                               self._learner['ml_m'], param_grids['ml_m'], scoring_methods['ml_m'],
-                               n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search)
+        g_tune_res = _dml_tune(
+            approx_goal,
+            x,
+            train_inds_treat,
+            self._learner["ml_g"],
+            param_grids["ml_g"],
+            scoring_methods["ml_g"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
+
+        m_tune_res = _dml_tune(
+            d,
+            x,
+            train_inds,
+            self._learner["ml_m"],
+            param_grids["ml_m"],
+            scoring_methods["ml_m"],
+            n_folds_tune,
+            n_jobs_cv,
+            search_mode,
+            n_iter_randomized_search,
+        )
 
         g_best_params = [xx.best_params_ for xx in g_tune_res]
         m_best_params = [xx.best_params_ for xx in m_tune_res]
 
-        params = {'ml_g': g_best_params,
-                  'ml_m': m_best_params}
-        tune_res = {'g_tune': g_tune_res,
-                    'm_tune': m_tune_res}
+        params = {"ml_g": g_best_params, "ml_m": m_best_params}
+        tune_res = {"g_tune": g_tune_res, "m_tune": m_tune_res}
 
-        res = {'params': params,
-               'tune_res': tune_res}
+        res = {"params": params, "tune_res": tune_res}
 
         return res
 
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
-            raise TypeError('The data must be of DoubleMLData type. '
-                            f'{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed.')
+            raise TypeError(
+                "The data must be of DoubleMLData type. " f"{str(obj_dml_data)} of type {str(type(obj_dml_data))} was passed."
+            )
         _check_contains_iv(obj_dml_data)
         _check_zero_one_treatment(self)
         return
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 4468db83..f1c89755 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -23,8 +23,17 @@ def normalize_ipw(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_none(request):
+    return request.param
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_none(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
-def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
+def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_none, set_ml_g_none):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
     data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
@@ -41,8 +50,8 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
         "draw_sample_splitting": False
     }
 
-    ml_g = LogisticRegression()
-    ml_m = LogisticRegression()
+    ml_g = LogisticRegression(random_state=42)
+    ml_m = LogisticRegression(random_state=42)
 
     DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLPQ.set_sample_splitting(all_smpls)
@@ -50,10 +59,19 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw):
 
     DMLPQ.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
-
-    DMLPLQ_ext = DoubleMLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    if set_ml_m_none:
+        ml_m = LogisticRegression(random_state=42)
+    else:
+        ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
+        ml_m = dummy_classifier()
+        
+    if set_ml_g_none:
+        ml_g = LogisticRegression(random_state=42)
+    else:
+        ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
+        ml_g = dummy_classifier()
+
+    DMLPLQ_ext = DoubleMLPQ(ml_g = ml_g, ml_m = ml_m, **kwargs)
     DMLPLQ_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)

From 4e8169b902fb8e9c777a762580b9c482cf1cb0c8 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 6 Dec 2023 18:32:57 +0100
Subject: [PATCH 090/134] remove pandas import

---
 doubleml/_utils_checks.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doubleml/_utils_checks.py b/doubleml/_utils_checks.py
index 2d75b48d..39be143d 100644
--- a/doubleml/_utils_checks.py
+++ b/doubleml/_utils_checks.py
@@ -1,5 +1,4 @@
 import numpy as np
-import pandas as pd
 import warnings
 
 from sklearn.utils.multiclass import type_of_target

From 7075af320d34adac13efa78889397265f863726d Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 12:05:58 +0100
Subject: [PATCH 091/134] update pq model for individual external prediction

---
 doubleml/double_ml_pq.py                      | 12 +++++------
 .../tests/test_pq_external_predictions.py     | 20 +++++++++----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index 546cbe92..ee398f49 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -337,14 +337,14 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 if not m_external:
                     # get a copy of ml_m as a preliminary learner
                     ml_m_prelim = clone(fitted_models["ml_m"][i_fold])
-                    m_hat_prelim = _dml_cv_predict(ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim)[
-                        "preds"
-                    ]
+                    m_hat_prelim = _dml_cv_predict(
+                        ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim
+                    )["preds"]
                 else:
                     m_hat_prelim = m_hat["preds"][np.concatenate([test for train, test in smpls_prelim])]
-                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)   
+                m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
                 if self._normalize_ipw:
-                        m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
+                    m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)
                 if self.treatment == 0:
                     m_hat_prelim = 1 - m_hat_prelim
 
@@ -401,7 +401,7 @@ def ipw_score(theta):
         if self.treatment == 0:
             m_hat_adj = 1 - m_hat_adj
         # readjust start value for minimization
-        if not (g_external or m_external):
+        if not g_external or not m_external:
             self._coef_start_val = np.mean(ipw_vec)
 
         psi_elements = {"ind_d": d == self.treatment, "g": g_hat["preds"], "m": m_hat_adj, "y": y}
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index f1c89755..358134c6 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -24,16 +24,16 @@ def normalize_ipw(request):
 
 
 @pytest.fixture(scope="module", params=[True, False])
-def set_ml_m_none(request):
+def set_ml_m_ext(request):
     return request.param
 
 @pytest.fixture(scope="module", params=[True, False])
-def set_ml_g_none(request):
+def set_ml_g_ext(request):
     return request.param
 
 
 @pytest.fixture(scope="module")
-def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_none, set_ml_g_none):
+def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
     data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
@@ -50,8 +50,8 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_none, set_
         "draw_sample_splitting": False
     }
 
-    ml_g = LogisticRegression(random_state=42)
     ml_m = LogisticRegression(random_state=42)
+    ml_g = LogisticRegression(random_state=42)
 
     DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLPQ.set_sample_splitting(all_smpls)
@@ -59,17 +59,17 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_none, set_
 
     DMLPQ.fit(store_predictions=True)
 
-    if set_ml_m_none:
-        ml_m = LogisticRegression(random_state=42)
-    else:
+    if set_ml_m_ext:
         ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
         ml_m = dummy_classifier()
-        
-    if set_ml_g_none:
-        ml_g = LogisticRegression(random_state=42)
     else:
+        ml_m = LogisticRegression(random_state=42)
+        
+    if set_ml_g_ext:
         ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
         ml_g = dummy_classifier()
+    else:
+        ml_g = LogisticRegression(random_state=42)
 
     DMLPLQ_ext = DoubleMLPQ(ml_g = ml_g, ml_m = ml_m, **kwargs)
     DMLPLQ_ext.set_sample_splitting(all_smpls)

From 48329e4dfcd9000442c8b630a7a48935591e053c Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:01:32 +0100
Subject: [PATCH 092/134] update external preds in IRM model

---
 doubleml/double_ml_irm.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index c8a2d208..78bd3928 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -214,9 +214,13 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
+        
+        g0_external = external_predictions['ml_g0'] is not None
+        g1_external = external_predictions['ml_g1'] is not None
+        m_external = external_predictions['ml_m'] is not None
 
         # nuisance g
-        if external_predictions['ml_g0'] is not None:
+        if g0_external:
             # use external predictions
             g_hat0 = {'preds': external_predictions['ml_g0'],
                       'targets': None,
@@ -237,7 +241,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                      'observed to be binary with values 0 and 1. Make sure that for classifiers '
                                      'probabilities and not labels are predicted.')
 
-        if external_predictions['ml_g1'] is not None:
+        if g1_external:
             # use external predictions
             g_hat1 = {'preds': external_predictions['ml_g1'],
                       'targets': None,
@@ -260,7 +264,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                  'probabilities and not labels are predicted.')
 
         # nuisance m
-        if external_predictions['ml_m'] is not None:
+        if m_external:
             # use external predictions
             m_hat = {'preds': external_predictions['ml_m'],
                      'targets': None,

From a5bb73b02d5e50fd0d66f0e375036b75ed574d34 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:02:05 +0100
Subject: [PATCH 093/134] add unit test for IRM uncomplete external preds.

---
 .../tests/test_irm_external_predictions.py    | 32 +++++++++++++++----
 1 file changed, 26 insertions(+), 6 deletions(-)

diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index c1463a07..ee55ce66 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from doubleml import DoubleMLIRM, DoubleMLData
 from doubleml.datasets import make_irm_data
 from doubleml.utils import dummy_regressor, dummy_classifier
@@ -22,8 +22,18 @@ def n_rep(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_ext(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
-def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
+def doubleml_irm_fixture(irm_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
 
     x, y, d = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="np.array")
@@ -39,11 +49,20 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
 
     DMLIRM.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+        ml_m = dummy_classifier()
+    else:
+        ml_m = LogisticRegression(random_state=42)
 
-    DMLIRM_ext = DoubleMLIRM(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    if set_ml_g_ext:
+        ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
+        ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
+        ml_g = dummy_regressor()
+    else:
+        ml_g = LinearRegression()
+
+    DMLIRM_ext = DoubleMLIRM(ml_g=ml_g, ml_m=ml_m, **kwargs)
 
     np.random.seed(3141)
     DMLIRM_ext.fit(external_predictions=ext_predictions)
@@ -52,6 +71,7 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep):
 
     return res_dict
 
+
 @pytest.mark.ci
 def test_doubleml_irm_coef(doubleml_irm_fixture):
     assert math.isclose(doubleml_irm_fixture["coef_normal"], doubleml_irm_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)

From 35d8f33ef60806fd74f2bd2b2bde7bb1523ca267 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:02:16 +0100
Subject: [PATCH 094/134] update external preds in PLR model

---
 doubleml/double_ml_plr.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 0e4ff31a..4b222c8b 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -171,9 +171,16 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
+        
+        m_external = external_predictions['ml_m'] is not None
+        l_external = external_predictions['ml_l'] is not None
+        if 'ml_g' in self._learner:
+            g_external = external_predictions['ml_g'] is not None
+        else:
+            g_external = False
 
         # nuisance l
-        if external_predictions['ml_l'] is not None:
+        if l_external:
             l_hat = {'preds': external_predictions['ml_l'],
                      'targets': None,
                      'models': None}
@@ -184,7 +191,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             _check_finite_predictions(l_hat['preds'], self._learner['ml_l'], 'ml_l', smpls)
 
         # nuisance m
-        if external_predictions['ml_m'] is not None:
+        if m_external:
             m_hat = {'preds': external_predictions['ml_m'],
                      'targets': None,
                      'models': None}
@@ -213,7 +220,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
             theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
             # nuisance g
-            if external_predictions['ml_g'] is not None:
+            if g_external:
                 g_hat = {'preds': external_predictions['ml_g'],
                         'targets': None,
                         'models': None}

From 73220b830cf12854737eb94ff58e76c9aca02ac4 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:02:28 +0100
Subject: [PATCH 095/134] add unit test for PLR uncomplete external preds.

---
 .../tests/test_plr_external_predictions.py    | 42 +++++++++++++++----
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
index ca04794f..f1386e11 100644
--- a/doubleml/tests/test_plr_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from sklearn.linear_model import LinearRegression
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
 from doubleml.utils import dummy_regressor
@@ -22,8 +22,23 @@ def n_rep(request):
     return request.param
 
 
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_m_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_l_ext(request):
+    return request.param
+
+
+@pytest.fixture(scope="module", params=[True, False])
+def set_ml_g_ext(request):
+    return request.param
+
+
 @pytest.fixture(scope="module")
-def doubleml_plr_fixture(plr_score, dml_procedure, n_rep):
+def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
 
     x, y, d = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, alpha=0.5, return_type="np.array")
@@ -42,14 +57,27 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep):
 
     DMLPLR.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
-    ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
+    if set_ml_m_ext:
+        ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
+        ml_m = dummy_regressor()
+    else:
+        ml_m = LinearRegression()
 
-    if plr_score == "IV-type":
-        kwargs["ml_g"] = dummy_regressor()
+    if set_ml_l_ext:
+        ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
+        ml_l = dummy_regressor()
+    else:
+        ml_l = LinearRegression()
+
+    if plr_score == "IV-type" and set_ml_g_ext:
         ext_predictions["d"]["ml_g"] = DMLPLR.predictions["ml_g"][:, :, 0]
+        kwargs["ml_g"] = dummy_regressor()
+    elif plr_score == "IV-type" and not set_ml_g_ext:
+        kwargs["ml_g"] = LinearRegression()
+    else:
+        pass
 
-    DMLPLR_ext = DoubleMLPLR(ml_m=dummy_regressor(), ml_l=dummy_regressor(), **kwargs)
+    DMLPLR_ext = DoubleMLPLR(ml_m=ml_m, ml_l=ml_l, **kwargs)
 
     np.random.seed(3141)
     DMLPLR_ext.fit(external_predictions=ext_predictions)

From 076c49b30e3bfa36707a400c229df0ad63dc65bf Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:22:18 +0100
Subject: [PATCH 096/134] add flags if external predictions are implemented.

---
 doubleml/double_ml.py        | 11 +++++++++--
 doubleml/double_ml_did.py    |  2 ++
 doubleml/double_ml_did_cs.py |  3 +++
 doubleml/double_ml_iivm.py   |  2 ++
 doubleml/double_ml_irm.py    |  2 ++
 doubleml/double_ml_lpq.py    |  2 ++
 doubleml/double_ml_pliv.py   |  2 ++
 doubleml/double_ml_plr.py    |  2 ++
 doubleml/double_ml_pq.py     |  4 +++-
 9 files changed, 27 insertions(+), 3 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index c97a6304..2ca29b00 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -63,6 +63,9 @@ def __init__(self,
         self._sensitivity_implemented = False
         self._sensitivity_elements = None
         self._sensitivity_params = None
+        
+        # initialize external predictions
+        self._external_predictions_implemented = False
 
         # check resampling specifications
         if not isinstance(n_folds, int):
@@ -530,8 +533,12 @@ def fit(self, n_jobs_cv=None, store_predictions=True, external_predictions=None,
             raise TypeError('store_models must be True or False. '
                             f'Got {str(store_models)}.')
 
-        # check prediction format
-        self._check_external_predictions(external_predictions)
+        # check if external predictions are implemented
+        if self._external_predictions_implemented:
+            # check prediction format
+            self._check_external_predictions(external_predictions)
+        elif not self._external_predictions_implemented and external_predictions is not None:
+            raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
 
         # initialize rmse arrays for nuisance functions evaluation
         self._initialize_rmses()
diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 87c02931..1add5e0d 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -148,6 +148,8 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        
+        self._external_predictions_implemented = True
 
     @property
     def in_sample_normalization(self):
diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index f0986eed..2cdbb003 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -148,6 +148,9 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        
+        self._external_predictions_implemented = True
+        
 
     @property
     def in_sample_normalization(self):
diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 1cb793b2..5e3cb073 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -193,6 +193,8 @@ def __init__(self,
                 raise TypeError("subgroups['never_takers'] must be True or False. "
                                 f'Got {str(subgroups["never_takers"])}.')
         self.subgroups = subgroups
+        
+        self._external_predictions_implemented = True
 
     @property
     def normalize_ipw(self):
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 78bd3928..0c049b66 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -161,6 +161,8 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
+        
+        self._external_predictions_implemented = True
 
     @property
     def normalize_ipw(self):
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index a3255b6f..6efd375f 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -188,6 +188,8 @@ def __init__(
                 stratify=strata,
             )
             self._smpls = obj_dml_resampling.split_samples()
+            
+        self._external_predictions_implemented = True
 
     @property
     def quantile(self):
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index e16caa94..b7f6259c 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -145,6 +145,8 @@ def __init__(self,
         if 'ml_g' in self._learner:
             self._predict_method['ml_g'] = 'predict'
         self._initialize_ml_nuisance_params()
+        
+        self._external_predictions_implemented = True
 
     @classmethod
     def _partialX(cls,
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index 4b222c8b..c374d6ee 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -150,6 +150,8 @@ def __init__(self,
 
         self._initialize_ml_nuisance_params()
         self._sensitivity_implemented = True
+        
+        self._external_predictions_implemented = True
 
     def _initialize_ml_nuisance_params(self):
         self._params = {learner: {key: [None] * self.n_rep for key in self._dml_data.d_cols}
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index ee398f49..e7f42eae 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -182,6 +182,8 @@ def __init__(
                 stratify=self._dml_data.d,
             )
             self._smpls = obj_dml_resampling.split_samples()
+            
+        self._external_predictions_implemented = True
 
     @property
     def quantile(self):
@@ -341,7 +343,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                         ml_m_prelim, x_train_1, d_train_1, method="predict_proba", smpls=smpls_prelim
                     )["preds"]
                 else:
-                    m_hat_prelim = m_hat["preds"][np.concatenate([test for train, test in smpls_prelim])]
+                    m_hat_prelim = m_hat["preds"][np.concatenate([test for _, test in smpls_prelim])]
                 m_hat_prelim = _trimm(m_hat_prelim, self.trimming_rule, self.trimming_threshold)
                 if self._normalize_ipw:
                     m_hat_prelim = _normalize_ipw(m_hat_prelim, d_train_1)

From f84fc862ffad09971c569d5ee407f3523ea762e8 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 14:22:47 +0100
Subject: [PATCH 097/134] change DGP in PQ external prediction test

---
 doubleml/tests/test_pq_external_predictions.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 358134c6..328e911d 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -36,7 +36,7 @@ def set_ml_g_ext(request):
 def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type="DataFrame")
+    data = make_irm_data(theta=0.5, n_obs=1000, dim_x=5, return_type="DataFrame")
 
     dml_data = DoubleMLData(data, "y", "d")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
@@ -56,7 +56,6 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
     DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLPQ.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-
     DMLPQ.fit(store_predictions=True)
 
     if set_ml_m_ext:

From 5e8f32de64ae151d7b71e3cced7e8a3c9d2a8ae6 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 16:16:07 +0100
Subject: [PATCH 098/134] add unit test for NotImpl.Error for ext. preds.

---
 ...leml_external_prediction_implementation.py | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 doubleml/tests/test_doubleml_external_prediction_implementation.py

diff --git a/doubleml/tests/test_doubleml_external_prediction_implementation.py b/doubleml/tests/test_doubleml_external_prediction_implementation.py
new file mode 100644
index 00000000..b6ca3cbf
--- /dev/null
+++ b/doubleml/tests/test_doubleml_external_prediction_implementation.py
@@ -0,0 +1,21 @@
+import numpy as np
+import pytest
+from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLData
+from doubleml.datasets import make_irm_data
+from doubleml.utils import dummy_regressor, dummy_classifier
+
+df_irm = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="DataFrame")
+
+# CVAR
+msg = "External predictions not implemented for DoubleMLCVAR."
+ext_predictions = {"d": {}}
+with pytest.raises(NotImplementedError, match=msg):
+    cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), dummy_regressor(), dummy_classifier(), treatment=1)
+    cvar.fit(external_predictions=ext_predictions)
+
+
+# QTE
+msg = "External predictions not implemented for DoubleMLQTE."
+with pytest.raises(NotImplementedError, match=msg):
+    qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), dummy_classifier(), dummy_classifier())
+    cvar.fit(external_predictions=ext_predictions)

From bb9f94fb0df53c6cd3317b05dbfb67ef058aa447 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Thu, 7 Dec 2023 16:43:41 +0100
Subject: [PATCH 099/134] add NotImpl.Error for ext. preds in QTE

---
 doubleml/double_ml_qte.py                     |  6 ++++-
 ...leml_external_prediction_implementation.py | 25 +++++++++++--------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/doubleml/double_ml_qte.py b/doubleml/double_ml_qte.py
index c640abf7..8f2286d1 100644
--- a/doubleml/double_ml_qte.py
+++ b/doubleml/double_ml_qte.py
@@ -386,7 +386,7 @@ def __psi1_deriv(self):
     def __all_se(self):
         return self._all_se[self._i_quant, self._i_rep]
 
-    def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False):
+    def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_models=False, external_predictions=None):
         """
         Estimate DoubleMLQTE models.
 
@@ -414,12 +414,16 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
         -------
         self : object
         """
+        
+        if external_predictions is not None:
+            raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
 
         # parallel estimation of the quantiles
         parallel = Parallel(n_jobs=n_jobs_models, verbose=0, pre_dispatch='2*n_jobs')
         fitted_models = parallel(delayed(self._fit_quantile)(i_quant, n_jobs_cv, store_predictions, store_models)
                                  for i_quant in range(self.n_quantiles))
 
+
         # combine the estimates and scores
         for i_quant in range(self.n_quantiles):
             self._i_quant = i_quant
diff --git a/doubleml/tests/test_doubleml_external_prediction_implementation.py b/doubleml/tests/test_doubleml_external_prediction_implementation.py
index b6ca3cbf..9d082859 100644
--- a/doubleml/tests/test_doubleml_external_prediction_implementation.py
+++ b/doubleml/tests/test_doubleml_external_prediction_implementation.py
@@ -5,17 +5,20 @@
 from doubleml.utils import dummy_regressor, dummy_classifier
 
 df_irm = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="DataFrame")
-
-# CVAR
-msg = "External predictions not implemented for DoubleMLCVAR."
 ext_predictions = {"d": {}}
-with pytest.raises(NotImplementedError, match=msg):
-    cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), dummy_regressor(), dummy_classifier(), treatment=1)
-    cvar.fit(external_predictions=ext_predictions)
 
 
-# QTE
-msg = "External predictions not implemented for DoubleMLQTE."
-with pytest.raises(NotImplementedError, match=msg):
-    qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), dummy_classifier(), dummy_classifier())
-    cvar.fit(external_predictions=ext_predictions)
+@pytest.mark.ci
+def test_cvar_external_prediction_exception():
+    msg = "External predictions not implemented for DoubleMLCVAR."
+    with pytest.raises(NotImplementedError, match=msg):
+        cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), dummy_regressor(), dummy_classifier(), treatment=1)
+        cvar.fit(external_predictions=ext_predictions)
+
+
+@pytest.mark.ci
+def test_qte_external_prediction_exception():
+    msg = "External predictions not implemented for DoubleMLQTE."
+    with pytest.raises(NotImplementedError, match=msg):
+        qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), dummy_classifier(), dummy_classifier())
+        qte.fit(external_predictions=ext_predictions)

From 432ccc5dbc26295956b4dba69446fea2cfe714db Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 8 Dec 2023 13:17:38 +0100
Subject: [PATCH 100/134] reformatting

---
 ... => test_doubleml_exceptions_ext_preds.py} |  3 +-
 .../tests/test_pliv_external_predictions.py   |  5 +--
 .../tests/test_pq_external_predictions.py     | 29 +++++++++++----
 doubleml/utils/dummy_learners.py              | 36 +++++++++++++++++++
 4 files changed, 62 insertions(+), 11 deletions(-)
 rename doubleml/tests/{test_doubleml_external_prediction_implementation.py => test_doubleml_exceptions_ext_preds.py} (90%)

diff --git a/doubleml/tests/test_doubleml_external_prediction_implementation.py b/doubleml/tests/test_doubleml_exceptions_ext_preds.py
similarity index 90%
rename from doubleml/tests/test_doubleml_external_prediction_implementation.py
rename to doubleml/tests/test_doubleml_exceptions_ext_preds.py
index 9d082859..4be4430f 100644
--- a/doubleml/tests/test_doubleml_external_prediction_implementation.py
+++ b/doubleml/tests/test_doubleml_exceptions_ext_preds.py
@@ -1,10 +1,9 @@
-import numpy as np
 import pytest
 from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLData
 from doubleml.datasets import make_irm_data
 from doubleml.utils import dummy_regressor, dummy_classifier
 
-df_irm = make_irm_data(n_obs=500, dim_x=20, theta=0.5, return_type="DataFrame")
+df_irm = make_irm_data(n_obs=10, dim_x=2, theta=0.5, return_type="DataFrame")
 ext_predictions = {"d": {}}
 
 
diff --git a/doubleml/tests/test_pliv_external_predictions.py b/doubleml/tests/test_pliv_external_predictions.py
index cbd13dfe..b9061498 100644
--- a/doubleml/tests/test_pliv_external_predictions.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV
+from sklearn.linear_model import LinearRegression
 from doubleml import DoubleMLPLIV, DoubleMLData
 from doubleml.datasets import make_pliv_CHS2015
 from doubleml.utils import dummy_regressor
@@ -32,6 +32,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
     # IV-type score only allows dim_z = 1, so skip testcases with dim_z > 1 for IV-type score
     if dim_z > 1 and score == "IV-type":
         pytest.skip("IV-type score only allows dim_z = 1")
+        res_dict = None
     else:
         ext_predictions = {"d": {}}
 
@@ -86,7 +87,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
 
         res_dict = {"coef_normal": DMLPLIV.coef, "coef_ext": DMLPLIV_ext.coef}
 
-        return res_dict
+    return res_dict
 
 
 @pytest.mark.ci
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 328e911d..0f3c0bc7 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LogisticRegression
 from doubleml import DoubleMLPQ, DoubleMLData
 from doubleml.datasets import make_irm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import dummy_classifier
 from ._utils import draw_smpls
 
 
@@ -27,6 +27,7 @@ def normalize_ipw(request):
 def set_ml_m_ext(request):
     return request.param
 
+
 @pytest.fixture(scope="module", params=[True, False])
 def set_ml_g_ext(request):
     return request.param
@@ -36,7 +37,7 @@ def set_ml_g_ext(request):
 def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_ml_g_ext):
     ext_predictions = {"d": {}}
     np.random.seed(3141)
-    data = make_irm_data(theta=0.5, n_obs=1000, dim_x=5, return_type="DataFrame")
+    data = make_irm_data(theta=1, n_obs=500, dim_x=5, return_type="DataFrame")
 
     dml_data = DoubleMLData(data, "y", "d")
     all_smpls = draw_smpls(len(dml_data.y), 5, n_rep=n_rep, groups=None)
@@ -47,7 +48,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
         "n_rep": n_rep,
         "dml_procedure": dml_procedure,
         "normalize_ipw": normalize_ipw,
-        "draw_sample_splitting": False
+        "draw_sample_splitting": False,
     }
 
     ml_m = LogisticRegression(random_state=42)
@@ -63,24 +64,38 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
         ml_m = dummy_classifier()
     else:
         ml_m = LogisticRegression(random_state=42)
-        
+
     if set_ml_g_ext:
         ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
         ml_g = dummy_classifier()
     else:
         ml_g = LogisticRegression(random_state=42)
 
-    DMLPLQ_ext = DoubleMLPQ(ml_g = ml_g, ml_m = ml_m, **kwargs)
+    DMLPLQ_ext = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
     DMLPLQ_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
     DMLPLQ_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLPQ.coef, "coef_ext": DMLPLQ_ext.coef}
+    if set_ml_m_ext and not set_ml_g_ext:
+        # adjust tolerance for the case that ml_m is set to external predictions
+        # because no preliminary results are available for ml_m, the model use the (external) final predictions for ml_m
+        tol_rel = 0.1
+        tol_abs = 0.1
+    else:
+        tol_rel = 1e-9
+        tol_abs = 1e-4
+
+    res_dict = {"coef_normal": DMLPQ.coef, "coef_ext": DMLPLQ_ext.coef, "tol_rel": tol_rel, "tol_abs": tol_abs}
 
     return res_dict
 
 
 @pytest.mark.ci
 def test_doubleml_pq_coef(doubleml_pq_fixture):
-    assert math.isclose(doubleml_pq_fixture["coef_normal"], doubleml_pq_fixture["coef_ext"], rel_tol=1e-9, abs_tol=1e-4)
+    assert math.isclose(
+        doubleml_pq_fixture["coef_normal"],
+        doubleml_pq_fixture["coef_ext"],
+        rel_tol=doubleml_pq_fixture["tol_rel"],
+        abs_tol=doubleml_pq_fixture["tol_abs"],
+    )
diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 2f893fb2..4d771b20 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -2,6 +2,23 @@
 
 
 class dummy_regressor(BaseEstimator):
+    """
+    A dummy regressor that raises an AttributeError when attempting to access
+    its fit, predict, or set_params methods.
+    Attributes
+    ----------
+    _estimator_type : str
+        Type of the estimator, set to "regressor".
+    Methods
+    -------
+    fit(*args)
+        Raises AttributeError: "Accessed fit method of DummyRegressor!"
+    predict(*args)
+        Raises AttributeError: "Accessed predict method of DummyRegressor!"
+    set_params(*args)
+        Raises AttributeError: "Accessed set_params method of DummyRegressor!"
+    """
+
     _estimator_type = "regressor"
 
     def fit(*args):
@@ -15,6 +32,25 @@ def set_params(*args):
 
 
 class dummy_classifier(BaseEstimator):
+    """
+    A dummy classifier that raises an AttributeError when attempting to access
+    its fit, predict, set_params, or predict_proba methods.
+    Attributes
+    ----------
+    _estimator_type : str
+        Type of the estimator, set to "classifier".
+    Methods
+    -------
+    fit(*args)
+        Raises AttributeError: "Accessed fit method of DummyClassifier!"
+    predict(*args)
+        Raises AttributeError: "Accessed predict method of DummyClassifier!"
+    set_params(*args)
+        Raises AttributeError: "Accessed set_params method of DummyClassifier!"
+    predict_proba(*args, **kwargs)
+        Raises AttributeError: "Accessed predict_proba method of DummyClassifier!"
+    """
+
     _estimator_type = "classifier"
 
     def fit(*args):

From 4009c47543dc0c158fd53783ff3ed02e4b7f446a Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 8 Dec 2023 13:21:27 +0100
Subject: [PATCH 101/134] Fix Typo in try except statement

---
 doubleml/tests/test_dummy_learners.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/tests/test_dummy_learners.py b/doubleml/tests/test_dummy_learners.py
index ee3d979a..a357345c 100644
--- a/doubleml/tests/test_dummy_learners.py
+++ b/doubleml/tests/test_dummy_learners.py
@@ -42,5 +42,5 @@ def test_clone(dl_fixture):
     try:
         _ = clone(dl_fixture["dummy_regressor"])
         _ = clone(dl_fixture["dummy_classifier"])
-    except Error as e:
+    except Exception as e:
         pytest.fail(f"clone() raised an exception:\n{str(e)}\n")

From fae1d17b8387556f1cf406b0f526f5f80e818713 Mon Sep 17 00:00:00 2001
From: Jan Teichert-Kluge <jan.teichertkluge@uni-hamburg.de>
Date: Fri, 8 Dec 2023 13:44:14 +0100
Subject: [PATCH 102/134] Format to PEP8 standards

---
 doubleml/double_ml.py                         |  4 +-
 doubleml/double_ml_did.py                     | 14 +++---
 doubleml/double_ml_did_cs.py                  | 20 ++++----
 doubleml/double_ml_iivm.py                    | 17 ++++---
 doubleml/double_ml_irm.py                     |  6 +--
 doubleml/double_ml_lpq.py                     |  4 +-
 doubleml/double_ml_pliv.py                    | 26 +++++-----
 doubleml/double_ml_plr.py                     |  6 +--
 doubleml/double_ml_pq.py                      | 47 ++++++++++---------
 doubleml/double_ml_qte.py                     |  5 +-
 .../tests/test_did_external_predictions.py    |  5 +-
 .../tests/test_didcs_external_predictions.py  |  4 +-
 .../tests/test_iivm_external_predictions.py   |  5 +-
 .../tests/test_lpq_external_predictions.py    |  2 +-
 doubleml/tests/test_plr.py                    |  4 +-
 doubleml/tests/test_plr_rep_cross.py          |  4 +-
 16 files changed, 83 insertions(+), 90 deletions(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 2ca29b00..dd3547ad 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -63,7 +63,7 @@ def __init__(self,
         self._sensitivity_implemented = False
         self._sensitivity_elements = None
         self._sensitivity_params = None
-        
+
         # initialize external predictions
         self._external_predictions_implemented = False
 
@@ -127,7 +127,7 @@ def __init__(self,
             self.draw_sample_splitting()
 
         # initialize arrays according to obj_dml_data and the resampling settings
-        self._psi, self._psi_deriv, self._psi_elements,\
+        self._psi, self._psi_deriv, self._psi_elements, \
             self._coef, self._se, self._all_coef, self._all_se, self._all_dml1_coef = self._initialize_arrays()
 
         # also initialize bootstrap arrays with the default number of bootstrap replications
diff --git a/doubleml/double_ml_did.py b/doubleml/double_ml_did.py
index 1add5e0d..77bface4 100644
--- a/doubleml/double_ml_did.py
+++ b/doubleml/double_ml_did.py
@@ -146,9 +146,7 @@ def __init__(self,
         self._trimming_rule = trimming_rule
         self._trimming_threshold = trimming_threshold
         _check_trimming(self._trimming_rule, self._trimming_threshold)
-
         self._sensitivity_implemented = True
-        
         self._external_predictions_implemented = True
 
     @property
@@ -213,8 +211,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d0, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
 
             _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
@@ -228,8 +226,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
 
             _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
@@ -242,8 +240,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             # nuisance m
             if external_predictions['ml_m'] is not None:
                 m_hat = {'preds': external_predictions['ml_m'],
-                        'targets': None,
-                        'models': None}
+                         'targets': None,
+                         'models': None}
             else:
                 m_hat = _dml_cv_predict(self._learner['ml_m'], x, d, smpls=smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
diff --git a/doubleml/double_ml_did_cs.py b/doubleml/double_ml_did_cs.py
index 2cdbb003..55b5e32e 100644
--- a/doubleml/double_ml_did_cs.py
+++ b/doubleml/double_ml_did_cs.py
@@ -148,9 +148,7 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
-        
         self._external_predictions_implemented = True
-        
 
     @property
     def in_sample_normalization(self):
@@ -237,9 +235,9 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                            'models': None}
         else:
             g_hat_d0_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t0, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
-            
+                                          est_params=self._get_params('ml_g_d0_t0'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
+
             g_hat_d0_t0['targets'] = g_hat_d0_t0['targets'].astype(float)
             g_hat_d0_t0['targets'][np.invert((d == 0) & (t == 0))] = np.nan
         if external_predictions['ml_g_d0_t1'] is not None:
@@ -248,8 +246,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                            'models': None}
         else:
             g_hat_d0_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d0_t1, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                          est_params=self._get_params('ml_g_d0_t1'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
             g_hat_d0_t1['targets'] = g_hat_d0_t1['targets'].astype(float)
             g_hat_d0_t1['targets'][np.invert((d == 0) & (t == 1))] = np.nan
         if external_predictions['ml_g_d1_t0'] is not None:
@@ -258,8 +256,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                            'models': None}
         else:
             g_hat_d1_t0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t0, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                          est_params=self._get_params('ml_g_d1_t0'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
             g_hat_d1_t0['targets'] = g_hat_d1_t0['targets'].astype(float)
             g_hat_d1_t0['targets'][np.invert((d == 1) & (t == 0))] = np.nan
         if external_predictions['ml_g_d1_t1'] is not None:
@@ -268,8 +266,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                            'models': None}
         else:
             g_hat_d1_t1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls_d1_t1, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
-                                        return_models=return_models)
+                                          est_params=self._get_params('ml_g_d1_t1'), method=self._predict_method['ml_g'],
+                                          return_models=return_models)
             g_hat_d1_t1['targets'] = g_hat_d1_t1['targets'].astype(float)
             g_hat_d1_t1['targets'][np.invert((d == 1) & (t == 1))] = np.nan
 
diff --git a/doubleml/double_ml_iivm.py b/doubleml/double_ml_iivm.py
index 5e3cb073..d981250e 100644
--- a/doubleml/double_ml_iivm.py
+++ b/doubleml/double_ml_iivm.py
@@ -193,7 +193,6 @@ def __init__(self,
                 raise TypeError("subgroups['never_takers'] must be True or False. "
                                 f'Got {str(subgroups["never_takers"])}.')
         self.subgroups = subgroups
-        
         self._external_predictions_implemented = True
 
     @property
@@ -266,8 +265,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat0 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+                                     est_params=self._get_params('ml_g0'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
             _check_finite_predictions(g_hat0['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
             g_hat0['targets'] = g_hat0['targets'].astype(float)
@@ -289,8 +288,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                    est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                    return_models=return_models)
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
             _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
             g_hat1['targets'] = g_hat1['targets'].astype(float)
@@ -328,8 +327,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                           'models': None}
             else:
                 r_hat0 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z0, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
-                                        return_models=return_models)
+                                         est_params=self._get_params('ml_r0'), method=self._predict_method['ml_r'],
+                                         return_models=return_models)
         else:
             r_hat0 = {'preds': np.zeros_like(d), 'targets': np.zeros_like(d), 'models': None}
         if not r0:
@@ -346,8 +345,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                           'models': None}
             else:
                 r_hat1 = _dml_cv_predict(self._learner['ml_r'], x, d, smpls=smpls_z1, n_jobs=n_jobs_cv,
-                                        est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
-                                        return_models=return_models)
+                                         est_params=self._get_params('ml_r1'), method=self._predict_method['ml_r'],
+                                         return_models=return_models)
         else:
             r_hat1 = {'preds': np.ones_like(d), 'targets': np.ones_like(d), 'models': None}
         if not r1:
diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 0c049b66..2df99cd4 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -161,7 +161,6 @@ def __init__(self,
         _check_trimming(self._trimming_rule, self._trimming_threshold)
 
         self._sensitivity_implemented = True
-        
         self._external_predictions_implemented = True
 
     @property
@@ -216,7 +215,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         # get train indices for d == 0 and d == 1
         smpls_d0, smpls_d1 = _get_cond_smpls(smpls, d)
-        
         g0_external = external_predictions['ml_g0'] is not None
         g1_external = external_predictions['ml_g1'] is not None
         m_external = external_predictions['ml_m'] is not None
@@ -250,8 +248,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                       'models': None}
         else:
             g_hat1 = _dml_cv_predict(self._learner['ml_g'], x, y, smpls=smpls_d1, n_jobs=n_jobs_cv,
-                                 est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
-                                 return_models=return_models)
+                                     est_params=self._get_params('ml_g1'), method=self._predict_method['ml_g'],
+                                     return_models=return_models)
             _check_finite_predictions(g_hat1['preds'], self._learner['ml_g'], 'ml_g', smpls)
             # adjust target values to consider only compatible subsamples
             g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 6efd375f..d2d06ac2 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -188,7 +188,6 @@ def __init__(
                 stratify=strata,
             )
             self._smpls = obj_dml_resampling.split_samples()
-            
         self._external_predictions_implemented = True
 
     @property
@@ -385,7 +384,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
                 # preliminary propensity for z
                 ml_m_z_prelim = clone(fitted_models["ml_m_z"][i_fold])
-                m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1, method="predict_proba", smpls=smpls_prelim)[
+                m_z_hat_prelim = _dml_cv_predict(ml_m_z_prelim, x_train_1, z_train_1,
+                                                 method="predict_proba", smpls=smpls_prelim)[
                     "preds"
                 ]
 
diff --git a/doubleml/double_ml_pliv.py b/doubleml/double_ml_pliv.py
index b7f6259c..6725e925 100644
--- a/doubleml/double_ml_pliv.py
+++ b/doubleml/double_ml_pliv.py
@@ -145,7 +145,6 @@ def __init__(self,
         if 'ml_g' in self._learner:
             self._predict_method['ml_g'] = 'predict'
         self._initialize_ml_nuisance_params()
-        
         self._external_predictions_implemented = True
 
     @classmethod
@@ -320,8 +319,8 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
         # nuisance l
         if external_predictions['ml_l'] is not None:
             l_hat = {'preds': external_predictions['ml_l'],
-                      'targets': None,
-                      'models': None}
+                     'targets': None,
+                     'models': None}
         else:
             l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
@@ -335,11 +334,11 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
         if self._dml_data.n_instr == 1:
             # one instrument: just identified
             x, z = check_X_y(x, np.ravel(self._dml_data.z),
-                                force_all_finite=False)
+                             force_all_finite=False)
             if external_predictions['ml_m'] is not None:
                 m_hat = {'preds': external_predictions['ml_m'],
-                        'targets': None,
-                        'models': None}
+                         'targets': None,
+                         'models': None}
             else:
                 m_hat = _dml_cv_predict(self._learner['ml_m'], x, z, smpls=smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_m'), method=self._predict_method['ml_m'],
@@ -355,16 +354,17 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
             for i_instr in range(self._dml_data.n_instr):
                 z = self._dml_data.z
                 x, this_z = check_X_y(x, z[:, i_instr],
-                                    force_all_finite=False)
+                                      force_all_finite=False)
                 if external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]] is not None:
                     m_hat['preds'][:, i_instr] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
-                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = external_predictions['ml_m_' + self._dml_data.z_cols[i_instr]]
+                    predictions['ml_m_' + self._dml_data.z_cols[i_instr]] = external_predictions[
+                        'ml_m_' + self._dml_data.z_cols[i_instr]]
                     targets['ml_m_' + self._dml_data.z_cols[i_instr]] = None
                     models['ml_m_' + self._dml_data.z_cols[i_instr]] = None
                 else:
                     res_cv_predict = _dml_cv_predict(self._learner['ml_m'], x, this_z, smpls=smpls, n_jobs=n_jobs_cv,
-                                                    est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
-                                                    method=self._predict_method['ml_m'], return_models=return_models)
+                                                     est_params=self._get_params('ml_m_' + self._dml_data.z_cols[i_instr]),
+                                                     method=self._predict_method['ml_m'], return_models=return_models)
 
                     m_hat['preds'][:, i_instr] = res_cv_predict['preds']
 
@@ -394,8 +394,8 @@ def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return
             # get an initial estimate for theta using the partialling out score
             if external_predictions['ml_g'] is not None:
                 g_hat = {'preds': external_predictions['ml_g'],
-                        'targets': None,
-                        'models': None}
+                         'targets': None,
+                         'models': None}
             else:
                 psi_a = -np.multiply(d - r_hat['preds'], z - m_hat['preds'])
                 psi_b = np.multiply(z - m_hat['preds'], y - l_hat['preds'])
@@ -425,7 +425,7 @@ def _score_elements(self, y, z, d, l_hat, m_hat, r_hat, g_hat, smpls):
         # compute residuals
         u_hat = y - l_hat
         w_hat = d - r_hat
-        v_hat = z- m_hat
+        v_hat = z - m_hat
 
         r_hat_tilde = None
         if self._dml_data.n_instr > 1:
diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index c374d6ee..41e7df2f 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -150,7 +150,6 @@ def __init__(self,
 
         self._initialize_ml_nuisance_params()
         self._sensitivity_implemented = True
-        
         self._external_predictions_implemented = True
 
     def _initialize_ml_nuisance_params(self):
@@ -173,7 +172,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                          force_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d,
                          force_all_finite=False)
-        
         m_external = external_predictions['ml_m'] is not None
         l_external = external_predictions['ml_l'] is not None
         if 'ml_g' in self._learner:
@@ -224,8 +222,8 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             # nuisance g
             if g_external:
                 g_hat = {'preds': external_predictions['ml_g'],
-                        'targets': None,
-                        'models': None}
+                         'targets': None,
+                         'models': None}
             else:
                 g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
diff --git a/doubleml/double_ml_pq.py b/doubleml/double_ml_pq.py
index e7f42eae..dee3dc12 100644
--- a/doubleml/double_ml_pq.py
+++ b/doubleml/double_ml_pq.py
@@ -1,5 +1,4 @@
 import numpy as np
-import copy
 from sklearn.base import clone
 from sklearn.utils import check_X_y
 from sklearn.model_selection import StratifiedKFold, train_test_split
@@ -115,25 +114,29 @@ class DoubleMLPQ(NonLinearScoreMixin, DoubleML):
     d  0.553878  0.149858  3.696011  0.000219  0.260161  0.847595
     """
 
-    def __init__(
-        self,
-        obj_dml_data,
-        ml_g,
-        ml_m,
-        treatment=1,
-        quantile=0.5,
-        n_folds=5,
-        n_rep=1,
-        score="PQ",
-        dml_procedure="dml2",
-        normalize_ipw=True,
-        kde=None,
-        trimming_rule="truncate",
-        trimming_threshold=1e-2,
-        draw_sample_splitting=True,
-        apply_cross_fitting=True,
-    ):
-        super().__init__(obj_dml_data, n_folds, n_rep, score, dml_procedure, draw_sample_splitting, apply_cross_fitting)
+    def __init__(self,
+                 obj_dml_data,
+                 ml_g,
+                 ml_m,
+                 treatment=1,
+                 quantile=0.5,
+                 n_folds=5,
+                 n_rep=1,
+                 score='PQ',
+                 dml_procedure='dml2',
+                 normalize_ipw=True,
+                 kde=None,
+                 trimming_rule='truncate',
+                 trimming_threshold=1e-2,
+                 draw_sample_splitting=True,
+                 apply_cross_fitting=True):
+        super().__init__(obj_dml_data,
+                         n_folds,
+                         n_rep,
+                         score,
+                         dml_procedure,
+                         draw_sample_splitting,
+                         apply_cross_fitting)
 
         self._quantile = quantile
         self._treatment = treatment
@@ -141,7 +144,8 @@ def __init__(
             self._kde = _default_kde
         else:
             if not callable(kde):
-                raise TypeError("kde should be either a callable or None. " "%r was passed." % kde)
+                raise TypeError("kde should be either a callable or None. "
+                                "%r was passed." % kde)
             self._kde = kde
 
         self._normalize_ipw = normalize_ipw
@@ -182,7 +186,6 @@ def __init__(
                 stratify=self._dml_data.d,
             )
             self._smpls = obj_dml_resampling.split_samples()
-            
         self._external_predictions_implemented = True
 
     @property
diff --git a/doubleml/double_ml_qte.py b/doubleml/double_ml_qte.py
index 8f2286d1..9633434f 100644
--- a/doubleml/double_ml_qte.py
+++ b/doubleml/double_ml_qte.py
@@ -161,7 +161,7 @@ def __init__(self,
         self._modellist_0, self._modellist_1 = self._initialize_models()
 
         # initialize arrays according to obj_dml_data and the resampling settings
-        self._psi0, self._psi1, self._psi0_deriv, self._psi1_deriv,\
+        self._psi0, self._psi1, self._psi0_deriv, self._psi1_deriv, \
             self._coef, self._se, self._all_coef, self._all_se, self._all_dml1_coef = self._initialize_arrays()
 
         # also initialize bootstrap arrays with the default number of bootstrap replications
@@ -414,7 +414,7 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
         -------
         self : object
         """
-        
+
         if external_predictions is not None:
             raise NotImplementedError(f"External predictions not implemented for {self.__class__.__name__}.")
 
@@ -423,7 +423,6 @@ def fit(self, n_jobs_models=None, n_jobs_cv=None, store_predictions=True, store_
         fitted_models = parallel(delayed(self._fit_quantile)(i_quant, n_jobs_cv, store_predictions, store_models)
                                  for i_quant in range(self.n_quantiles))
 
-
         # combine the estimates and scores
         for i_quant in range(self.n_quantiles):
             self._i_quant = i_quant
diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index 12d7e3c9..0d92c9e8 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -1,12 +1,13 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLData, DoubleMLDID
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml import DoubleMLDID
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
 from ._utils import draw_smpls
 
+
 @pytest.fixture(scope="module", params=["observational", "experimental"])
 def did_score(request):
     return request.param
diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index 0eed900a..1498e571 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -1,8 +1,8 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
-from doubleml import DoubleMLData, DoubleMLDIDCS
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from doubleml import DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
 from doubleml.utils import dummy_regressor, dummy_classifier
 from ._utils import draw_smpls
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
index 40bb02db..cce62032 100644
--- a/doubleml/tests/test_iivm_external_predictions.py
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 import math
-from sklearn.linear_model import LinearRegression, LassoCV, LogisticRegression
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from doubleml import DoubleMLIIVM, DoubleMLData
 from doubleml.datasets import make_iivm_data
 from doubleml.utils import dummy_regressor, dummy_classifier
@@ -45,14 +45,13 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
     np.random.seed(3141)
 
     DMLIIVM.fit(store_predictions=True)
-    
+
     ext_predictions["d"]["ml_g0"] = DMLIIVM.predictions["ml_g0"][:, :, 0]
     ext_predictions["d"]["ml_g1"] = DMLIIVM.predictions["ml_g1"][:, :, 0]
     ext_predictions["d"]["ml_m"] = DMLIIVM.predictions["ml_m"][:, :, 0]
     ext_predictions["d"]["ml_r0"] = DMLIIVM.predictions["ml_r0"][:, :, 0]
     ext_predictions["d"]["ml_r1"] = DMLIIVM.predictions["ml_r1"][:, :, 0]
 
-    
     DMLIIVM_ext = DoubleMLIIVM(
         ml_g=dummy_regressor(), ml_m=dummy_classifier(), ml_r=dummy_classifier(), **kwargs
     )
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index 2a13b4bc..a5a9a5bb 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LogisticRegression
 from doubleml import DoubleMLLPQ, DoubleMLData
 from doubleml.datasets import make_iivm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import dummy_classifier
 from ._utils import draw_smpls
 
 
diff --git a/doubleml/tests/test_plr.py b/doubleml/tests/test_plr.py
index b17d6802..c3df97c4 100644
--- a/doubleml/tests/test_plr.py
+++ b/doubleml/tests/test_plr.py
@@ -76,7 +76,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
 
     res_manual = fit_plr(y, x, d, clone(learner), clone(learner), clone(learner),
                          all_smpls, dml_procedure, score)
-    
+
     np.random.seed(3141)
     # test with external nuisance predictions
     if score == 'partialling out':
@@ -104,7 +104,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure):
         prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, 1),
                                  'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, 1),
                                  'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, 1)}}
-        
+
     dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
 
diff --git a/doubleml/tests/test_plr_rep_cross.py b/doubleml/tests/test_plr_rep_cross.py
index 4f95f10a..9bbc2616 100644
--- a/doubleml/tests/test_plr_rep_cross.py
+++ b/doubleml/tests/test_plr_rep_cross.py
@@ -74,7 +74,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep):
 
     res_manual = fit_plr(y, x, d, _clone(learner), _clone(learner), _clone(learner),
                          all_smpls, dml_procedure, score, n_rep)
-    
+
     np.random.seed(3141)
     # test with external nuisance predictions
     if score == 'partialling out':
@@ -104,7 +104,7 @@ def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep):
         prediction_dict = {'d': {'ml_l': dml_plr_obj.predictions['ml_l'].reshape(-1, n_rep),
                                  'ml_m': dml_plr_obj.predictions['ml_m'].reshape(-1, n_rep),
                                  'ml_g': dml_plr_obj.predictions['ml_g'].reshape(-1, n_rep)}}
-        
+
     dml_plr_obj_ext.fit(external_predictions=prediction_dict)
 
     res_dict = {'coef': dml_plr_obj.coef,

From 1ef67ab91aab3661ad95994868767ca2cfe18f4d Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 11 Dec 2023 06:58:42 +0100
Subject: [PATCH 103/134] update irm tor remove deepcopy

---
 doubleml/double_ml_irm.py                       |  5 +++--
 doubleml/tests/_utils_irm_manual.py             | 12 ++++++++----
 doubleml/tests/test_irm.py                      | 12 ++++++------
 doubleml/tests/test_irm_external_predictions.py |  2 +-
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 2df99cd4..9149462f 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -1,7 +1,6 @@
 import numpy as np
 import pandas as pd
 import warnings
-import copy
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
@@ -304,13 +303,15 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
             for _, test_index in smpls:
                 p_hat[test_index] = np.mean(d[test_index])
 
-        m_hat_adj = copy.deepcopy(m_hat)
+        m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
         if self.normalize_ipw:
             if self.dml_procedure == 'dml1':
                 for _, test_index in smpls:
                     m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
             else:
                 m_hat_adj = _normalize_ipw(m_hat, d)
+        else:
+            m_hat_adj = m_hat
 
         # compute residuals
         u_hat0 = y - g_hat0
diff --git a/doubleml/tests/_utils_irm_manual.py b/doubleml/tests/_utils_irm_manual.py
index 60c09db8..c46fe605 100644
--- a/doubleml/tests/_utils_irm_manual.py
+++ b/doubleml/tests/_utils_irm_manual.py
@@ -1,5 +1,4 @@
 import numpy as np
-import copy
 from sklearn.base import clone, is_classifier
 
 from ._utils_boot import boot_manual, draw_weights
@@ -131,10 +130,12 @@ def irm_dml1(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
-    m_hat_adj = copy.deepcopy(m_hat)
+    m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
     if normalize_ipw:
         for _, test_index in smpls:
             m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
+    else:
+        m_hat_adj = m_hat
 
     for idx, (_, test_index) in enumerate(smpls):
         thetas[idx] = irm_orth(g_hat0[test_index], g_hat1[test_index],
@@ -165,9 +166,10 @@ def irm_dml2(y, x, d, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls, s
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
-    m_hat_adj = copy.deepcopy(m_hat)
     if normalize_ipw:
         m_hat_adj = _normalize_ipw(m_hat, d)
+    else:
+        m_hat_adj = m_hat
 
     theta_hat = irm_orth(g_hat0, g_hat1, m_hat_adj, p_hat,
                          u_hat0, u_hat1, d, score)
@@ -243,13 +245,15 @@ def boot_irm_single_split(theta, y, d, g_hat0_list, g_hat1_list, m_hat_list, p_h
     u_hat0, u_hat1, g_hat0, g_hat1, m_hat, p_hat = compute_iivm_residuals(
         y, g_hat0_list, g_hat1_list, m_hat_list, p_hat_list, smpls)
 
-    m_hat_adj = copy.deepcopy(m_hat)
+    m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
     if normalize_ipw:
         if dml_procedure == 'dml1':
             for _, test_index in smpls:
                 m_hat_adj[test_index] = _normalize_ipw(m_hat[test_index], d[test_index])
         else:
             m_hat_adj = _normalize_ipw(m_hat, d)
+    else:
+        m_hat_adj = m_hat
 
     if apply_cross_fitting:
         if score == 'ATE':
diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 7c834754..73dd5cca 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -150,21 +150,21 @@ def dml_irm_fixture(generate_data_irm, learner, score, dml_procedure, normalize_
 
 @pytest.mark.ci
 def test_dml_irm_coef(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['coef'],
+    assert math.isclose(dml_irm_fixture['coef'][0],
                         dml_irm_fixture['coef_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_irm_fixture['coef'],
-                        dml_irm_fixture['coef_ext'],
+    assert math.isclose(dml_irm_fixture['coef'][0],
+                        dml_irm_fixture['coef_ext'][0],
                         rel_tol=1e-9, abs_tol=1e-4)
 
 
 @pytest.mark.ci
 def test_dml_irm_se(dml_irm_fixture):
-    assert math.isclose(dml_irm_fixture['se'],
+    assert math.isclose(dml_irm_fixture['se'][0],
                         dml_irm_fixture['se_manual'],
                         rel_tol=1e-9, abs_tol=1e-4)
-    assert math.isclose(dml_irm_fixture['se'],
-                        dml_irm_fixture['se_ext'],
+    assert math.isclose(dml_irm_fixture['se'][0],
+                        dml_irm_fixture['se_ext'][0],
                         rel_tol=1e-9, abs_tol=1e-4)
 
 
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index ee55ce66..d291af29 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -67,7 +67,7 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_g
     np.random.seed(3141)
     DMLIRM_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLIRM.coef, "coef_ext": DMLIRM_ext.coef}
+    res_dict = {"coef_normal": DMLIRM.coef[0], "coef_ext": DMLIRM_ext.coef[0]}
 
     return res_dict
 

From 022976d51a5a4cdd7d489b3e2600ccab823bf0fd Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 11 Dec 2023 07:12:07 +0100
Subject: [PATCH 104/134] remove deepcopy from lpq

---
 doubleml/double_ml_lpq.py  | 25 ++++++++++++++++++++-----
 doubleml/tests/test_lpq.py |  4 ++--
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index d2d06ac2..2b7260eb 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -1,5 +1,4 @@
 import numpy as np
-import copy
 from sklearn.utils.multiclass import type_of_target
 from sklearn.base import clone
 from sklearn.utils import check_X_y
@@ -316,10 +315,26 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                 "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
                 "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
             }
-            m_d_z0_hat = copy.deepcopy(m_z_hat)
-            m_d_z1_hat = copy.deepcopy(m_z_hat)
-            g_du_z0_hat = copy.deepcopy(m_z_hat)
-            g_du_z1_hat = copy.deepcopy(m_z_hat)
+            m_d_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            m_d_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            g_du_z0_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
+            g_du_z1_hat = {
+                "models": None,
+                "targets": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+                "preds": np.full(shape=self._dml_data.n_obs, fill_value=np.nan),
+            }
 
             # initialize models
             fitted_models = {}
diff --git a/doubleml/tests/test_lpq.py b/doubleml/tests/test_lpq.py
index e7550e06..beb8b6a0 100644
--- a/doubleml/tests/test_lpq.py
+++ b/doubleml/tests/test_lpq.py
@@ -119,9 +119,9 @@ def dml_lpq_fixture(generate_data_local_quantiles, treatment, quantile, learner,
                              normalize_ipw=normalize_ipw, kde=kde,
                              n_rep=1, trimming_threshold=trimming_threshold)
 
-    res_dict = {'coef': dml_lpq_obj.coef,
+    res_dict = {'coef': dml_lpq_obj.coef[0],
                 'coef_manual': res_manual['lpq'],
-                'se': dml_lpq_obj.se,
+                'se': dml_lpq_obj.se[0],
                 'se_manual': res_manual['se']}
 
     return res_dict

From 3044f5c557970aa0f6783d9c531c83e5443ddc38 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 11 Dec 2023 07:59:57 +0100
Subject: [PATCH 105/134] renaming external prediction tests

---
 .../tests/test_did_external_predictions.py    | 20 ++++++++--------
 .../tests/test_didcs_external_predictions.py  | 24 +++++++++----------
 .../tests/test_iivm_external_predictions.py   | 20 ++++++++--------
 .../tests/test_irm_external_predictions.py    | 16 ++++++-------
 .../tests/test_lpq_external_predictions.py    | 24 +++++++++----------
 .../tests/test_pliv_external_predictions.py   | 20 ++++++++--------
 .../tests/test_plr_external_predictions.py    | 16 ++++++-------
 .../tests/test_pq_external_predictions.py     | 18 +++++++-------
 8 files changed, 79 insertions(+), 79 deletions(-)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index 0d92c9e8..59a1e6f1 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -35,21 +35,21 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
         "dml_procedure": dml_procedure,
         "draw_sample_splitting": False
     }
-    DMLDID = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    DMLDID.set_sample_splitting(all_smpls)
+    dml_did = DoubleMLDID(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    dml_did.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLDID.fit(store_predictions=True)
+    dml_did.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g0"] = DMLDID.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLDID.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLDID.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_g0"] = dml_did.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = dml_did.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0]
 
-    DMLDID_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-    DMLDID_ext.set_sample_splitting(all_smpls)
+    dml_did_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    dml_did_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLDID_ext.fit(external_predictions=ext_predictions)
+    dml_did_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLDID.coef, "coef_ext": DMLDID_ext.coef}
+    res_dict = {"coef_normal": dml_did.coef, "coef_ext": dml_did_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index 1498e571..90aafaef 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -36,23 +36,23 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
         "dml_procedure": dml_procedure,
         "draw_sample_splitting": False
     }
-    DMLDIDCS = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
-    DMLDIDCS.set_sample_splitting(all_smpls)
+    dml_did_cs = DoubleMLDIDCS(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    dml_did_cs.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLDIDCS.fit(store_predictions=True)
+    dml_did_cs.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g_d0_t0"] = DMLDIDCS.predictions["ml_g_d0_t0"][:, :, 0]
-    ext_predictions["d"]["ml_g_d0_t1"] = DMLDIDCS.predictions["ml_g_d0_t1"][:, :, 0]
-    ext_predictions["d"]["ml_g_d1_t0"] = DMLDIDCS.predictions["ml_g_d1_t0"][:, :, 0]
-    ext_predictions["d"]["ml_g_d1_t1"] = DMLDIDCS.predictions["ml_g_d1_t1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLDIDCS.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_g_d0_t0"] = dml_did_cs.predictions["ml_g_d0_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d0_t1"] = dml_did_cs.predictions["ml_g_d0_t1"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t0"] = dml_did_cs.predictions["ml_g_d1_t0"][:, :, 0]
+    ext_predictions["d"]["ml_g_d1_t1"] = dml_did_cs.predictions["ml_g_d1_t1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_did_cs.predictions["ml_m"][:, :, 0]
 
-    DMLDIDCS_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
-    DMLDIDCS_ext.set_sample_splitting(all_smpls)
+    dml_did_cs_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    dml_did_cs_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLDIDCS_ext.fit(external_predictions=ext_predictions)
+    dml_did_cs_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLDIDCS.coef, "coef_ext": DMLDIDCS_ext.coef}
+    res_dict = {"coef_normal": dml_did_cs.coef, "coef_ext": dml_did_cs_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
index cce62032..fc9d3665 100644
--- a/doubleml/tests/test_iivm_external_predictions.py
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -36,7 +36,7 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
         "dml_procedure": dml_procedure,
     }
 
-    DMLIIVM = DoubleMLIIVM(
+    dml_iivm = DoubleMLIIVM(
         ml_g=LinearRegression(),
         ml_m=LogisticRegression(),
         ml_r=LogisticRegression(),
@@ -44,22 +44,22 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
     )
     np.random.seed(3141)
 
-    DMLIIVM.fit(store_predictions=True)
+    dml_iivm.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_g0"] = DMLIIVM.predictions["ml_g0"][:, :, 0]
-    ext_predictions["d"]["ml_g1"] = DMLIIVM.predictions["ml_g1"][:, :, 0]
-    ext_predictions["d"]["ml_m"] = DMLIIVM.predictions["ml_m"][:, :, 0]
-    ext_predictions["d"]["ml_r0"] = DMLIIVM.predictions["ml_r0"][:, :, 0]
-    ext_predictions["d"]["ml_r1"] = DMLIIVM.predictions["ml_r1"][:, :, 0]
+    ext_predictions["d"]["ml_g0"] = dml_iivm.predictions["ml_g0"][:, :, 0]
+    ext_predictions["d"]["ml_g1"] = dml_iivm.predictions["ml_g1"][:, :, 0]
+    ext_predictions["d"]["ml_m"] = dml_iivm.predictions["ml_m"][:, :, 0]
+    ext_predictions["d"]["ml_r0"] = dml_iivm.predictions["ml_r0"][:, :, 0]
+    ext_predictions["d"]["ml_r1"] = dml_iivm.predictions["ml_r1"][:, :, 0]
 
-    DMLIIVM_ext = DoubleMLIIVM(
+    dml_iivm_ext = DoubleMLIIVM(
         ml_g=dummy_regressor(), ml_m=dummy_classifier(), ml_r=dummy_classifier(), **kwargs
     )
 
     np.random.seed(3141)
-    DMLIIVM_ext.fit(external_predictions=ext_predictions)
+    dml_iivm_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLIIVM.coef, "coef_ext": DMLIIVM_ext.coef}
+    res_dict = {"coef_normal": dml_iivm.coef, "coef_ext": dml_iivm_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index d291af29..533e6250 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -44,30 +44,30 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_g
 
     kwargs = {"obj_dml_data": dml_data, "score": irm_score, "n_rep": n_rep, "dml_procedure": dml_procedure}
 
-    DMLIRM = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
+    dml_irm = DoubleMLIRM(ml_g=LinearRegression(), ml_m=LogisticRegression(), **kwargs)
     np.random.seed(3141)
 
-    DMLIRM.fit(store_predictions=True)
+    dml_irm.fit(store_predictions=True)
 
     if set_ml_m_ext:
-        ext_predictions["d"]["ml_m"] = DMLIRM.predictions["ml_m"][:, :, 0]
+        ext_predictions["d"]["ml_m"] = dml_irm.predictions["ml_m"][:, :, 0]
         ml_m = dummy_classifier()
     else:
         ml_m = LogisticRegression(random_state=42)
 
     if set_ml_g_ext:
-        ext_predictions["d"]["ml_g0"] = DMLIRM.predictions["ml_g0"][:, :, 0]
-        ext_predictions["d"]["ml_g1"] = DMLIRM.predictions["ml_g1"][:, :, 0]
+        ext_predictions["d"]["ml_g0"] = dml_irm.predictions["ml_g0"][:, :, 0]
+        ext_predictions["d"]["ml_g1"] = dml_irm.predictions["ml_g1"][:, :, 0]
         ml_g = dummy_regressor()
     else:
         ml_g = LinearRegression()
 
-    DMLIRM_ext = DoubleMLIRM(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_irm_ext = DoubleMLIRM(ml_g=ml_g, ml_m=ml_m, **kwargs)
 
     np.random.seed(3141)
-    DMLIRM_ext.fit(external_predictions=ext_predictions)
+    dml_irm_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLIRM.coef[0], "coef_ext": DMLIRM_ext.coef[0]}
+    res_dict = {"coef_normal": dml_irm.coef[0], "coef_ext": dml_irm_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index a5a9a5bb..be4b9f73 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -44,25 +44,25 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ml_g = LogisticRegression()
     ml_m = LogisticRegression()
 
-    DMLLPQ = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
-    DMLLPQ.set_sample_splitting(all_smpls)
+    dml_lpq = DoubleMLLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_lpq.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
-    DMLLPQ.fit(store_predictions=True)
+    dml_lpq.fit(store_predictions=True)
 
-    ext_predictions["d"]["ml_m_z"] = DMLLPQ.predictions["ml_m_z"][:, :, 0]
-    ext_predictions["d"]["ml_m_d_z0"] = DMLLPQ.predictions["ml_m_d_z0"][:, :, 0]
-    ext_predictions["d"]["ml_m_d_z1"] = DMLLPQ.predictions["ml_m_d_z1"][:, :, 0]
-    ext_predictions["d"]["ml_g_du_z0"] = DMLLPQ.predictions["ml_g_du_z0"][:, :, 0]
-    ext_predictions["d"]["ml_g_du_z1"] = DMLLPQ.predictions["ml_g_du_z1"][:, :, 0]
+    ext_predictions["d"]["ml_m_z"] = dml_lpq.predictions["ml_m_z"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z0"] = dml_lpq.predictions["ml_m_d_z0"][:, :, 0]
+    ext_predictions["d"]["ml_m_d_z1"] = dml_lpq.predictions["ml_m_d_z1"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z0"] = dml_lpq.predictions["ml_g_du_z0"][:, :, 0]
+    ext_predictions["d"]["ml_g_du_z1"] = dml_lpq.predictions["ml_g_du_z1"][:, :, 0]
 
-    DMLLPLQ_ext = DoubleMLLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
-    DMLLPLQ_ext.set_sample_splitting(all_smpls)
+    dml_lpq_ext = DoubleMLLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    dml_lpq_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
-    DMLLPLQ_ext.fit(external_predictions=ext_predictions)
+    dml_lpq_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLLPQ.coef, "coef_ext": DMLLPLQ_ext.coef}
+    res_dict = {"coef_normal": dml_lpq.coef, "coef_ext": dml_lpq_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_pliv_external_predictions.py b/doubleml/tests/test_pliv_external_predictions.py
index b9061498..562ebe7d 100644
--- a/doubleml/tests/test_pliv_external_predictions.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -55,7 +55,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         if score == "IV-type":
             kwargs["ml_g"] = LinearRegression()
 
-        DMLPLIV = DoubleMLPLIV(
+        dml_pliv = DoubleMLPLIV(
             ml_m=LinearRegression(),
             ml_l=LinearRegression(),
             ml_r=LinearRegression(),
@@ -63,29 +63,29 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         )
         np.random.seed(3141)
 
-        DMLPLIV.fit(store_predictions=True)
+        dml_pliv.fit(store_predictions=True)
 
-        ext_predictions["d"]["ml_l"] = DMLPLIV.predictions["ml_l"][:, :, 0]
-        ext_predictions["d"]["ml_r"] = DMLPLIV.predictions["ml_r"][:, :, 0]
+        ext_predictions["d"]["ml_l"] = dml_pliv.predictions["ml_l"][:, :, 0]
+        ext_predictions["d"]["ml_r"] = dml_pliv.predictions["ml_r"][:, :, 0]
 
         if dim_z == 1:
-            ext_predictions["d"]["ml_m"] = DMLPLIV.predictions["ml_m"][:, :, 0]
+            ext_predictions["d"]["ml_m"] = dml_pliv.predictions["ml_m"][:, :, 0]
             if score == "IV-type":
                 kwargs["ml_g"] = dummy_regressor()
-                ext_predictions["d"]["ml_g"] = DMLPLIV.predictions["ml_g"][:, :, 0]
+                ext_predictions["d"]["ml_g"] = dml_pliv.predictions["ml_g"][:, :, 0]
         else:
             for instr in range(dim_z):
                 ml_m_key = "ml_m_" + "Z" + str(instr + 1)
-                ext_predictions["d"][ml_m_key] = DMLPLIV.predictions[ml_m_key][:, :, 0]
+                ext_predictions["d"][ml_m_key] = dml_pliv.predictions[ml_m_key][:, :, 0]
 
-        DMLPLIV_ext = DoubleMLPLIV(
+        dml_pliv_ext = DoubleMLPLIV(
             ml_m=dummy_regressor(), ml_l=dummy_regressor(), ml_r=dummy_regressor(), **kwargs
         )
 
         np.random.seed(3141)
-        DMLPLIV_ext.fit(external_predictions=ext_predictions)
+        dml_pliv_ext.fit(external_predictions=ext_predictions)
 
-        res_dict = {"coef_normal": DMLPLIV.coef, "coef_ext": DMLPLIV_ext.coef}
+        res_dict = {"coef_normal": dml_pliv.coef, "coef_ext": dml_pliv_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
index f1386e11..3946ca05 100644
--- a/doubleml/tests/test_plr_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -52,37 +52,37 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l
     if plr_score == "IV-type":
         kwargs["ml_g"] = LinearRegression()
 
-    DMLPLR = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
+    dml_plr = DoubleMLPLR(ml_m=LinearRegression(), ml_l=LinearRegression(), **kwargs)
     np.random.seed(3141)
 
-    DMLPLR.fit(store_predictions=True)
+    dml_plr.fit(store_predictions=True)
 
     if set_ml_m_ext:
-        ext_predictions["d"]["ml_m"] = DMLPLR.predictions["ml_m"][:, :, 0]
+        ext_predictions["d"]["ml_m"] = dml_plr.predictions["ml_m"][:, :, 0]
         ml_m = dummy_regressor()
     else:
         ml_m = LinearRegression()
 
     if set_ml_l_ext:
-        ext_predictions["d"]["ml_l"] = DMLPLR.predictions["ml_l"][:, :, 0]
+        ext_predictions["d"]["ml_l"] = dml_plr.predictions["ml_l"][:, :, 0]
         ml_l = dummy_regressor()
     else:
         ml_l = LinearRegression()
 
     if plr_score == "IV-type" and set_ml_g_ext:
-        ext_predictions["d"]["ml_g"] = DMLPLR.predictions["ml_g"][:, :, 0]
+        ext_predictions["d"]["ml_g"] = dml_plr.predictions["ml_g"][:, :, 0]
         kwargs["ml_g"] = dummy_regressor()
     elif plr_score == "IV-type" and not set_ml_g_ext:
         kwargs["ml_g"] = LinearRegression()
     else:
         pass
 
-    DMLPLR_ext = DoubleMLPLR(ml_m=ml_m, ml_l=ml_l, **kwargs)
+    dml_plr_ext = DoubleMLPLR(ml_m=ml_m, ml_l=ml_l, **kwargs)
 
     np.random.seed(3141)
-    DMLPLR_ext.fit(external_predictions=ext_predictions)
+    dml_plr_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": DMLPLR.coef, "coef_ext": DMLPLR_ext.coef}
+    res_dict = {"coef_normal": dml_plr.coef, "coef_ext": dml_plr_ext.coef}
 
     return res_dict
 
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 0f3c0bc7..a2962ab0 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -54,28 +54,28 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
     ml_m = LogisticRegression(random_state=42)
     ml_g = LogisticRegression(random_state=42)
 
-    DMLPQ = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
-    DMLPQ.set_sample_splitting(all_smpls)
+    dml_pq = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_pq.set_sample_splitting(all_smpls)
     np.random.seed(3141)
-    DMLPQ.fit(store_predictions=True)
+    dml_pq.fit(store_predictions=True)
 
     if set_ml_m_ext:
-        ext_predictions["d"]["ml_m"] = DMLPQ.predictions["ml_m"][:, :, 0]
+        ext_predictions["d"]["ml_m"] = dml_pq.predictions["ml_m"][:, :, 0]
         ml_m = dummy_classifier()
     else:
         ml_m = LogisticRegression(random_state=42)
 
     if set_ml_g_ext:
-        ext_predictions["d"]["ml_g"] = DMLPQ.predictions["ml_g"][:, :, 0]
+        ext_predictions["d"]["ml_g"] = dml_pq.predictions["ml_g"][:, :, 0]
         ml_g = dummy_classifier()
     else:
         ml_g = LogisticRegression(random_state=42)
 
-    DMLPLQ_ext = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
-    DMLPLQ_ext.set_sample_splitting(all_smpls)
+    dml_pq_ext = DoubleMLPQ(ml_g=ml_g, ml_m=ml_m, **kwargs)
+    dml_pq_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
-    DMLPLQ_ext.fit(external_predictions=ext_predictions)
+    dml_pq_ext.fit(external_predictions=ext_predictions)
 
     if set_ml_m_ext and not set_ml_g_ext:
         # adjust tolerance for the case that ml_m is set to external predictions
@@ -86,7 +86,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
         tol_rel = 1e-9
         tol_abs = 1e-4
 
-    res_dict = {"coef_normal": DMLPQ.coef, "coef_ext": DMLPLQ_ext.coef, "tol_rel": tol_rel, "tol_abs": tol_abs}
+    res_dict = {"coef_normal": dml_pq.coef, "coef_ext": dml_pq_ext.coef, "tol_rel": tol_rel, "tol_abs": tol_abs}
 
     return res_dict
 

From ee0403744522ead8ba8ccc6b35d567403652ae20 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 11 Dec 2023 08:06:00 +0100
Subject: [PATCH 106/134] reduce test warnings

---
 doubleml/tests/test_did_external_predictions.py   | 2 +-
 doubleml/tests/test_didcs_external_predictions.py | 2 +-
 doubleml/tests/test_iivm_external_predictions.py  | 2 +-
 doubleml/tests/test_lpq_external_predictions.py   | 2 +-
 doubleml/tests/test_pliv_external_predictions.py  | 2 +-
 doubleml/tests/test_plr_external_predictions.py   | 2 +-
 doubleml/tests/test_pq_external_predictions.py    | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index 59a1e6f1..43138cf8 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -49,7 +49,7 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     np.random.seed(3141)
     dml_did_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_did.coef, "coef_ext": dml_did_ext.coef}
+    res_dict = {"coef_normal": dml_did.coef[0], "coef_ext": dml_did_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index 90aafaef..a1ffda5e 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -52,7 +52,7 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     np.random.seed(3141)
     dml_did_cs_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_did_cs.coef, "coef_ext": dml_did_cs_ext.coef}
+    res_dict = {"coef_normal": dml_did_cs.coef[0], "coef_ext": dml_did_cs_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
index fc9d3665..548f2297 100644
--- a/doubleml/tests/test_iivm_external_predictions.py
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -59,7 +59,7 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
     np.random.seed(3141)
     dml_iivm_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_iivm.coef, "coef_ext": dml_iivm_ext.coef}
+    res_dict = {"coef_normal": dml_iivm.coef[0], "coef_ext": dml_iivm_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index be4b9f73..af46191f 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -62,7 +62,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     np.random.seed(3141)
     dml_lpq_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_lpq.coef, "coef_ext": dml_lpq_ext.coef}
+    res_dict = {"coef_normal": dml_lpq.coef[0], "coef_ext": dml_lpq_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_pliv_external_predictions.py b/doubleml/tests/test_pliv_external_predictions.py
index 562ebe7d..5a5eb097 100644
--- a/doubleml/tests/test_pliv_external_predictions.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -85,7 +85,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         np.random.seed(3141)
         dml_pliv_ext.fit(external_predictions=ext_predictions)
 
-        res_dict = {"coef_normal": dml_pliv.coef, "coef_ext": dml_pliv_ext.coef}
+        res_dict = {"coef_normal": dml_pliv.coef[0], "coef_ext": dml_pliv_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
index 3946ca05..9d776710 100644
--- a/doubleml/tests/test_plr_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -82,7 +82,7 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l
     np.random.seed(3141)
     dml_plr_ext.fit(external_predictions=ext_predictions)
 
-    res_dict = {"coef_normal": dml_plr.coef, "coef_ext": dml_plr_ext.coef}
+    res_dict = {"coef_normal": dml_plr.coef[0], "coef_ext": dml_plr_ext.coef[0]}
 
     return res_dict
 
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index a2962ab0..4f23800f 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -86,7 +86,7 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
         tol_rel = 1e-9
         tol_abs = 1e-4
 
-    res_dict = {"coef_normal": dml_pq.coef, "coef_ext": dml_pq_ext.coef, "tol_rel": tol_rel, "tol_abs": tol_abs}
+    res_dict = {"coef_normal": dml_pq.coef[0], "coef_ext": dml_pq_ext.coef[0], "tol_rel": tol_rel, "tol_abs": tol_abs}
 
     return res_dict
 

From c807994bba8d3d5758b32c4872ef94b224d668d8 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 15 Dec 2023 09:14:05 +0100
Subject: [PATCH 107/134] Rename dummy learners

---
 .../tests/test_did_external_predictions.py    |  4 +--
 .../tests/test_didcs_external_predictions.py  |  4 +--
 .../test_doubleml_exceptions_ext_preds.py     |  6 ++---
 doubleml/tests/test_dummy_learners.py         | 26 +++++++++----------
 .../tests/test_iivm_external_predictions.py   |  4 +--
 .../tests/test_irm_external_predictions.py    |  6 ++---
 .../tests/test_lpq_external_predictions.py    |  4 +--
 .../tests/test_pliv_external_predictions.py   |  6 ++---
 .../tests/test_plr_external_predictions.py    |  8 +++---
 .../tests/test_pq_external_predictions.py     |  6 ++---
 doubleml/utils/__init__.py                    |  8 +++---
 doubleml/utils/dummy_learners.py              | 18 ++++++-------
 12 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/doubleml/tests/test_did_external_predictions.py b/doubleml/tests/test_did_external_predictions.py
index 43138cf8..89f437d3 100644
--- a/doubleml/tests/test_did_external_predictions.py
+++ b/doubleml/tests/test_did_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from doubleml import DoubleMLDID
 from doubleml.datasets import make_did_SZ2020
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
 from ._utils import draw_smpls
 
 
@@ -44,7 +44,7 @@ def doubleml_did_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_g1"] = dml_did.predictions["ml_g1"][:, :, 0]
     ext_predictions["d"]["ml_m"] = dml_did.predictions["ml_m"][:, :, 0]
 
-    dml_did_ext = DoubleMLDID(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    dml_did_ext = DoubleMLDID(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs)
     dml_did_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     dml_did_ext.fit(external_predictions=ext_predictions)
diff --git a/doubleml/tests/test_didcs_external_predictions.py b/doubleml/tests/test_didcs_external_predictions.py
index a1ffda5e..631143ab 100644
--- a/doubleml/tests/test_didcs_external_predictions.py
+++ b/doubleml/tests/test_didcs_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from doubleml import DoubleMLDIDCS
 from doubleml.datasets import make_did_SZ2020
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
 from ._utils import draw_smpls
 
 
@@ -47,7 +47,7 @@ def doubleml_didcs_fixture(did_score, dml_procedure, n_rep):
     ext_predictions["d"]["ml_g_d1_t1"] = dml_did_cs.predictions["ml_g_d1_t1"][:, :, 0]
     ext_predictions["d"]["ml_m"] = dml_did_cs.predictions["ml_m"][:, :, 0]
 
-    dml_did_cs_ext = DoubleMLDIDCS(ml_g=dummy_regressor(), ml_m=dummy_classifier(), **kwargs)
+    dml_did_cs_ext = DoubleMLDIDCS(ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), **kwargs)
     dml_did_cs_ext.set_sample_splitting(all_smpls)
     np.random.seed(3141)
     dml_did_cs_ext.fit(external_predictions=ext_predictions)
diff --git a/doubleml/tests/test_doubleml_exceptions_ext_preds.py b/doubleml/tests/test_doubleml_exceptions_ext_preds.py
index 4be4430f..395d8bf5 100644
--- a/doubleml/tests/test_doubleml_exceptions_ext_preds.py
+++ b/doubleml/tests/test_doubleml_exceptions_ext_preds.py
@@ -1,7 +1,7 @@
 import pytest
 from doubleml import DoubleMLCVAR, DoubleMLQTE, DoubleMLData
 from doubleml.datasets import make_irm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
 
 df_irm = make_irm_data(n_obs=10, dim_x=2, theta=0.5, return_type="DataFrame")
 ext_predictions = {"d": {}}
@@ -11,7 +11,7 @@
 def test_cvar_external_prediction_exception():
     msg = "External predictions not implemented for DoubleMLCVAR."
     with pytest.raises(NotImplementedError, match=msg):
-        cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), dummy_regressor(), dummy_classifier(), treatment=1)
+        cvar = DoubleMLCVAR(DoubleMLData(df_irm, "y", "d"), DMLDummyRegressor(), DMLDummyClassifier(), treatment=1)
         cvar.fit(external_predictions=ext_predictions)
 
 
@@ -19,5 +19,5 @@ def test_cvar_external_prediction_exception():
 def test_qte_external_prediction_exception():
     msg = "External predictions not implemented for DoubleMLQTE."
     with pytest.raises(NotImplementedError, match=msg):
-        qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), dummy_classifier(), dummy_classifier())
+        qte = DoubleMLQTE(DoubleMLData(df_irm, "y", "d"), DMLDummyClassifier(), DMLDummyClassifier())
         qte.fit(external_predictions=ext_predictions)
diff --git a/doubleml/tests/test_dummy_learners.py b/doubleml/tests/test_dummy_learners.py
index a357345c..c23088fa 100644
--- a/doubleml/tests/test_dummy_learners.py
+++ b/doubleml/tests/test_dummy_learners.py
@@ -1,14 +1,14 @@
 import pytest
 import numpy as np
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
 from sklearn.base import clone
 
 
 @pytest.fixture(scope="module")
 def dl_fixture():
     fixture = {
-        "dummy_regressor": dummy_regressor(),
-        "dummy_classifier": dummy_classifier(),
+        "DMLDummyRegressor": DMLDummyRegressor(),
+        "DMLDummyClassifier": DMLDummyClassifier(),
         "X": np.random.normal(0, 1, size=(100, 10)),
         "y_con": np.random.normal(0, 1, size=(100, 1)),
         "y_cat": np.random.binomial(1, 0.5, size=(100, 1)),
@@ -19,28 +19,28 @@ def dl_fixture():
 
 @pytest.mark.ci
 def test_fit(dl_fixture):
-    msg = "Accessed fit method of dummy_regressor!"
+    msg = "Accessed fit method of DMLDummyRegressor!"
     with pytest.raises(AttributeError, match=msg):
-        dl_fixture["dummy_regressor"].fit(dl_fixture["X"], dl_fixture["y_con"])
-    msg = "Accessed fit method of dummy_classifier!"
+        dl_fixture["DMLDummyRegressor"].fit(dl_fixture["X"], dl_fixture["y_con"])
+    msg = "Accessed fit method of DMLDummyClassifier!"
     with pytest.raises(AttributeError, match=msg):
-        dl_fixture["dummy_classifier"].fit(dl_fixture["X"], dl_fixture["y_cat"])
+        dl_fixture["DMLDummyClassifier"].fit(dl_fixture["X"], dl_fixture["y_cat"])
 
 
 @pytest.mark.ci
 def test_predict(dl_fixture):
-    msg = "Accessed predict method of dummy_regressor!"
+    msg = "Accessed predict method of DMLDummyRegressor!"
     with pytest.raises(AttributeError, match=msg):
-        dl_fixture["dummy_regressor"].predict(dl_fixture["X"])
-    msg = "Accessed predict method of dummy_classifier!"
+        dl_fixture["DMLDummyRegressor"].predict(dl_fixture["X"])
+    msg = "Accessed predict method of DMLDummyClassifier!"
     with pytest.raises(AttributeError, match=msg):
-        dl_fixture["dummy_classifier"].predict(dl_fixture["X"])
+        dl_fixture["DMLDummyClassifier"].predict(dl_fixture["X"])
 
 
 @pytest.mark.ci
 def test_clone(dl_fixture):
     try:
-        _ = clone(dl_fixture["dummy_regressor"])
-        _ = clone(dl_fixture["dummy_classifier"])
+        _ = clone(dl_fixture["DMLDummyRegressor"])
+        _ = clone(dl_fixture["DMLDummyClassifier"])
     except Exception as e:
         pytest.fail(f"clone() raised an exception:\n{str(e)}\n")
diff --git a/doubleml/tests/test_iivm_external_predictions.py b/doubleml/tests/test_iivm_external_predictions.py
index 548f2297..96d59018 100644
--- a/doubleml/tests/test_iivm_external_predictions.py
+++ b/doubleml/tests/test_iivm_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from doubleml import DoubleMLIIVM, DoubleMLData
 from doubleml.datasets import make_iivm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
 
 
 @pytest.fixture(scope="module", params=["dml1", "dml2"])
@@ -53,7 +53,7 @@ def adapted_doubleml_fixture(dml_procedure, n_rep):
     ext_predictions["d"]["ml_r1"] = dml_iivm.predictions["ml_r1"][:, :, 0]
 
     dml_iivm_ext = DoubleMLIIVM(
-        ml_g=dummy_regressor(), ml_m=dummy_classifier(), ml_r=dummy_classifier(), **kwargs
+        ml_g=DMLDummyRegressor(), ml_m=DMLDummyClassifier(), ml_r=DMLDummyClassifier(), **kwargs
     )
 
     np.random.seed(3141)
diff --git a/doubleml/tests/test_irm_external_predictions.py b/doubleml/tests/test_irm_external_predictions.py
index 533e6250..7e89a320 100644
--- a/doubleml/tests/test_irm_external_predictions.py
+++ b/doubleml/tests/test_irm_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LinearRegression, LogisticRegression
 from doubleml import DoubleMLIRM, DoubleMLData
 from doubleml.datasets import make_irm_data
-from doubleml.utils import dummy_regressor, dummy_classifier
+from doubleml.utils import DMLDummyRegressor, DMLDummyClassifier
 
 
 @pytest.fixture(scope="module", params=["ATE", "ATTE"])
@@ -51,14 +51,14 @@ def doubleml_irm_fixture(irm_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_g
 
     if set_ml_m_ext:
         ext_predictions["d"]["ml_m"] = dml_irm.predictions["ml_m"][:, :, 0]
-        ml_m = dummy_classifier()
+        ml_m = DMLDummyClassifier()
     else:
         ml_m = LogisticRegression(random_state=42)
 
     if set_ml_g_ext:
         ext_predictions["d"]["ml_g0"] = dml_irm.predictions["ml_g0"][:, :, 0]
         ext_predictions["d"]["ml_g1"] = dml_irm.predictions["ml_g1"][:, :, 0]
-        ml_g = dummy_regressor()
+        ml_g = DMLDummyRegressor()
     else:
         ml_g = LinearRegression()
 
diff --git a/doubleml/tests/test_lpq_external_predictions.py b/doubleml/tests/test_lpq_external_predictions.py
index af46191f..fbe0e742 100644
--- a/doubleml/tests/test_lpq_external_predictions.py
+++ b/doubleml/tests/test_lpq_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LogisticRegression
 from doubleml import DoubleMLLPQ, DoubleMLData
 from doubleml.datasets import make_iivm_data
-from doubleml.utils import dummy_classifier
+from doubleml.utils import DMLDummyClassifier
 from ._utils import draw_smpls
 
 
@@ -56,7 +56,7 @@ def doubleml_lpq_fixture(dml_procedure, n_rep, normalize_ipw):
     ext_predictions["d"]["ml_g_du_z0"] = dml_lpq.predictions["ml_g_du_z0"][:, :, 0]
     ext_predictions["d"]["ml_g_du_z1"] = dml_lpq.predictions["ml_g_du_z1"][:, :, 0]
 
-    dml_lpq_ext = DoubleMLLPQ(ml_g=dummy_classifier(), ml_m=dummy_classifier(), **kwargs)
+    dml_lpq_ext = DoubleMLLPQ(ml_g=DMLDummyClassifier(), ml_m=DMLDummyClassifier(), **kwargs)
     dml_lpq_ext.set_sample_splitting(all_smpls)
 
     np.random.seed(3141)
diff --git a/doubleml/tests/test_pliv_external_predictions.py b/doubleml/tests/test_pliv_external_predictions.py
index 5a5eb097..89d63b8f 100644
--- a/doubleml/tests/test_pliv_external_predictions.py
+++ b/doubleml/tests/test_pliv_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LinearRegression
 from doubleml import DoubleMLPLIV, DoubleMLData
 from doubleml.datasets import make_pliv_CHS2015
-from doubleml.utils import dummy_regressor
+from doubleml.utils import DMLDummyRegressor
 
 
 @pytest.fixture(scope="module", params=["partialling out", "IV-type"])
@@ -71,7 +71,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
         if dim_z == 1:
             ext_predictions["d"]["ml_m"] = dml_pliv.predictions["ml_m"][:, :, 0]
             if score == "IV-type":
-                kwargs["ml_g"] = dummy_regressor()
+                kwargs["ml_g"] = DMLDummyRegressor()
                 ext_predictions["d"]["ml_g"] = dml_pliv.predictions["ml_g"][:, :, 0]
         else:
             for instr in range(dim_z):
@@ -79,7 +79,7 @@ def adapted_doubleml_fixture(score, dml_procedure, n_rep, dim_z):
                 ext_predictions["d"][ml_m_key] = dml_pliv.predictions[ml_m_key][:, :, 0]
 
         dml_pliv_ext = DoubleMLPLIV(
-            ml_m=dummy_regressor(), ml_l=dummy_regressor(), ml_r=dummy_regressor(), **kwargs
+            ml_m=DMLDummyRegressor(), ml_l=DMLDummyRegressor(), ml_r=DMLDummyRegressor(), **kwargs
         )
 
         np.random.seed(3141)
diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
index 9d776710..9844b574 100644
--- a/doubleml/tests/test_plr_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LinearRegression
 from doubleml import DoubleMLPLR, DoubleMLData
 from doubleml.datasets import make_plr_CCDDHNR2018
-from doubleml.utils import dummy_regressor
+from doubleml.utils import DMLDummyRegressor
 
 
 @pytest.fixture(scope="module", params=["IV-type", "partialling out"])
@@ -59,19 +59,19 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l
 
     if set_ml_m_ext:
         ext_predictions["d"]["ml_m"] = dml_plr.predictions["ml_m"][:, :, 0]
-        ml_m = dummy_regressor()
+        ml_m = DMLDummyRegressor()
     else:
         ml_m = LinearRegression()
 
     if set_ml_l_ext:
         ext_predictions["d"]["ml_l"] = dml_plr.predictions["ml_l"][:, :, 0]
-        ml_l = dummy_regressor()
+        ml_l = DMLDummyRegressor()
     else:
         ml_l = LinearRegression()
 
     if plr_score == "IV-type" and set_ml_g_ext:
         ext_predictions["d"]["ml_g"] = dml_plr.predictions["ml_g"][:, :, 0]
-        kwargs["ml_g"] = dummy_regressor()
+        kwargs["ml_g"] = DMLDummyRegressor()
     elif plr_score == "IV-type" and not set_ml_g_ext:
         kwargs["ml_g"] = LinearRegression()
     else:
diff --git a/doubleml/tests/test_pq_external_predictions.py b/doubleml/tests/test_pq_external_predictions.py
index 4f23800f..c814378d 100644
--- a/doubleml/tests/test_pq_external_predictions.py
+++ b/doubleml/tests/test_pq_external_predictions.py
@@ -4,7 +4,7 @@
 from sklearn.linear_model import LogisticRegression
 from doubleml import DoubleMLPQ, DoubleMLData
 from doubleml.datasets import make_irm_data
-from doubleml.utils import dummy_classifier
+from doubleml.utils import DMLDummyClassifier
 from ._utils import draw_smpls
 
 
@@ -61,13 +61,13 @@ def doubleml_pq_fixture(dml_procedure, n_rep, normalize_ipw, set_ml_m_ext, set_m
 
     if set_ml_m_ext:
         ext_predictions["d"]["ml_m"] = dml_pq.predictions["ml_m"][:, :, 0]
-        ml_m = dummy_classifier()
+        ml_m = DMLDummyClassifier()
     else:
         ml_m = LogisticRegression(random_state=42)
 
     if set_ml_g_ext:
         ext_predictions["d"]["ml_g"] = dml_pq.predictions["ml_g"][:, :, 0]
-        ml_g = dummy_classifier()
+        ml_g = DMLDummyClassifier()
     else:
         ml_g = LogisticRegression(random_state=42)
 
diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
index b3fbb9f0..68b9d377 100644
--- a/doubleml/utils/__init__.py
+++ b/doubleml/utils/__init__.py
@@ -1,7 +1,7 @@
-from .dummy_learners import dummy_classifier
-from .dummy_learners import dummy_regressor
+from .dummy_learners import DMLDummyRegressor
+from .dummy_learners import DMLDummyClassifier
 
 __all__ = [
-    "dummy_classifier",
-    "dummy_regressor",
+    "DMLDummyRegressor",
+    "DMLDummyClassifier",
 ]
diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 4d771b20..62127ff7 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -1,7 +1,7 @@
 from sklearn.base import BaseEstimator
 
 
-class dummy_regressor(BaseEstimator):
+class DMLDummyRegressor(BaseEstimator):
     """
     A dummy regressor that raises an AttributeError when attempting to access
     its fit, predict, or set_params methods.
@@ -22,16 +22,16 @@ class dummy_regressor(BaseEstimator):
     _estimator_type = "regressor"
 
     def fit(*args):
-        raise AttributeError("Accessed fit method of dummy_regressor!")
+        raise AttributeError("Accessed fit method of DMLDummyRegressor!")
 
     def predict(*args):
-        raise AttributeError("Accessed predict method of dummy_regressor!")
+        raise AttributeError("Accessed predict method of DMLDummyRegressor!")
 
     def set_params(*args):
-        raise AttributeError("Accessed set_params method of dummy_regressor!")
+        raise AttributeError("Accessed set_params method of DMLDummyRegressor!")
 
 
-class dummy_classifier(BaseEstimator):
+class DMLDummyClassifier(BaseEstimator):
     """
     A dummy classifier that raises an AttributeError when attempting to access
     its fit, predict, set_params, or predict_proba methods.
@@ -54,13 +54,13 @@ class dummy_classifier(BaseEstimator):
     _estimator_type = "classifier"
 
     def fit(*args):
-        raise AttributeError("Accessed fit method of dummy_classifier!")
+        raise AttributeError("Accessed fit method of DMLDummyClassifier!")
 
     def predict(*args):
-        raise AttributeError("Accessed predict method of dummy_classifier!")
+        raise AttributeError("Accessed predict method of DMLDummyClassifier!")
 
     def set_params(*args):
-        raise AttributeError("Accessed set_params method of dummy_classifier!")
+        raise AttributeError("Accessed set_params method of DMLDummyClassifier!")
 
     def predict_proba(*args, **kwargs):
-        raise AttributeError("Accessed predict_proba method of dummy_classifier!")
+        raise AttributeError("Accessed predict_proba method of DMLDummyClassifier!")

From afd39d5df9e2a07afaf7f9fb9382f23da2d38b87 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 15 Dec 2023 09:35:06 +0100
Subject: [PATCH 108/134] fix PLR external predictions

---
 doubleml/double_ml_plr.py                       | 16 ++++++++++------
 doubleml/tests/test_plr_external_predictions.py |  4 ++++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/doubleml/double_ml_plr.py b/doubleml/double_ml_plr.py
index bd4e5d8d..cd81bd1f 100644
--- a/doubleml/double_ml_plr.py
+++ b/doubleml/double_ml_plr.py
@@ -186,6 +186,10 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             l_hat = {'preds': external_predictions['ml_l'],
                      'targets': None,
                      'models': None}
+        elif self._score == "IV-type" and g_external:
+            l_hat = {'preds': None,
+                     'targets': None,
+                     'models': None}
         else:
             l_hat = _dml_cv_predict(self._learner['ml_l'], x, y, smpls=smpls, n_jobs=n_jobs_cv,
                                     est_params=self._get_params('ml_l'), method=self._predict_method['ml_l'],
@@ -217,16 +221,16 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         # an estimate of g is obtained for the IV-type score and callable scores
         g_hat = {'preds': None, 'targets': None, 'models': None}
         if 'ml_g' in self._learner:
-            # get an initial estimate for theta using the partialling out score
-            psi_a = -np.multiply(d - m_hat['preds'], d - m_hat['preds'])
-            psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
-            theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
             # nuisance g
             if g_external:
                 g_hat = {'preds': external_predictions['ml_g'],
                          'targets': None,
                          'models': None}
             else:
+                # get an initial estimate for theta using the partialling out score
+                psi_a = -np.multiply(d - m_hat['preds'], d - m_hat['preds'])
+                psi_b = np.multiply(d - m_hat['preds'], y - l_hat['preds'])
+                theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
                 g_hat = _dml_cv_predict(self._learner['ml_g'], x, y - theta_initial*d, smpls=smpls, n_jobs=n_jobs_cv,
                                         est_params=self._get_params('ml_g'), method=self._predict_method['ml_g'],
                                         return_models=return_models)
@@ -248,8 +252,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         return psi_elements, preds
 
     def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls):
-        # compute residuals
-        u_hat = y - l_hat
+        # compute residual
         v_hat = d - m_hat
 
         if isinstance(self.score, str):
@@ -258,6 +261,7 @@ def _score_elements(self, y, d, l_hat, m_hat, g_hat, smpls):
                 psi_b = np.multiply(v_hat, y - g_hat)
             else:
                 assert self.score == 'partialling out'
+                u_hat = y - l_hat
                 psi_a = -np.multiply(v_hat, v_hat)
                 psi_b = np.multiply(v_hat, u_hat)
         else:
diff --git a/doubleml/tests/test_plr_external_predictions.py b/doubleml/tests/test_plr_external_predictions.py
index 9844b574..58c987f5 100644
--- a/doubleml/tests/test_plr_external_predictions.py
+++ b/doubleml/tests/test_plr_external_predictions.py
@@ -77,6 +77,10 @@ def doubleml_plr_fixture(plr_score, dml_procedure, n_rep, set_ml_m_ext, set_ml_l
     else:
         pass
 
+    if plr_score == "IV-type" and set_ml_g_ext and not set_ml_l_ext:
+        ml_l = DMLDummyRegressor()
+
+    # special case if ml_l is not needed
     dml_plr_ext = DoubleMLPLR(ml_m=ml_m, ml_l=ml_l, **kwargs)
 
     np.random.seed(3141)

From 2c532f29044d994a724a71920e82e07c7606993b Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 15 Dec 2023 12:25:13 +0100
Subject: [PATCH 109/134] fix lpq targets

---
 doubleml/double_ml_lpq.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml_lpq.py b/doubleml/double_ml_lpq.py
index 2b7260eb..038a3984 100644
--- a/doubleml/double_ml_lpq.py
+++ b/doubleml/double_ml_lpq.py
@@ -510,7 +510,7 @@ def ipw_score(theta):
 
         # the predictions of both should only be evaluated conditional on z == 0 or z == 1
         m_d_z0_hat["targets"] = _cond_targets(d, cond_sample=(z == 0))
-        m_d_z0_hat["targets"] = _cond_targets(d, cond_sample=(z == 1))
+        m_d_z1_hat["targets"] = _cond_targets(d, cond_sample=(z == 1))
 
         if return_models:
             m_z_hat["models"] = fitted_models["ml_m_z"]

From e284fd59c36f4d76504704d51d5cfadf1e4ffe38 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Thu, 21 Dec 2023 14:12:22 +0100
Subject: [PATCH 110/134] add deprication warnings to doubleml and resampling

---
 doubleml/_utils_resampling.py | 12 ++++++++++++
 doubleml/double_ml.py         | 24 +++++++++++++++++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/doubleml/_utils_resampling.py b/doubleml/_utils_resampling.py
index a2ebdf31..98495e3d 100644
--- a/doubleml/_utils_resampling.py
+++ b/doubleml/_utils_resampling.py
@@ -4,6 +4,15 @@
 from sklearn.model_selection import KFold, RepeatedKFold, RepeatedStratifiedKFold
 
 
+# Remove warnings in future versions
+def deprication_apply_cross_fitting():
+    warnings.warn('The apply_cross_fitting argument is deprecated and will be removed in future versions. '
+                  'In the future, crossfitting is applied by default. '
+                  'To rely on sample splitting please use external predictions.',
+                  DeprecationWarning)
+    return
+
+
 class DoubleMLResampling:
     def __init__(self,
                  n_folds,
@@ -14,6 +23,9 @@ def __init__(self,
         self.n_folds = n_folds
         self.n_rep = n_rep
         self.n_obs = n_obs
+        if not apply_cross_fitting:
+            deprication_apply_cross_fitting()
+            apply_cross_fitting = True
         self.apply_cross_fitting = apply_cross_fitting
         self.stratify = stratify
         if (self.n_folds == 1) & self.apply_cross_fitting:
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index b4957f62..40efe090 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -24,6 +24,21 @@
 _implemented_data_backends = ['DoubleMLData', 'DoubleMLClusterData']
 
 
+# Remove warnings in future versions
+def deprication_apply_cross_fitting():
+    warnings.warn('The apply_cross_fitting argument is deprecated and will be removed in future versions. '
+                  'In the future, crossfitting is applied by default. '
+                  'To rely on sample splitting please use external predictions.',
+                  DeprecationWarning)
+    return
+
+
+def deprication_dml_procedure():
+    warnings.warn('The dml_procedure argument is deprecated and will be removed in future versions. '
+                  'in the future, dml_procedure is always set to dml2.', DeprecationWarning)
+    return
+
+
 class DoubleML(ABC):
     """Double Machine Learning.
     """
@@ -89,6 +104,9 @@ def __init__(self,
             raise TypeError('draw_sample_splitting must be True or False. '
                             f'Got {str(draw_sample_splitting)}.')
 
+        if not apply_cross_fitting:
+            deprication_apply_cross_fitting()
+
         # set resampling specifications
         if self._is_cluster_data:
             if (n_folds == 1) or (not apply_cross_fitting):
@@ -103,11 +121,15 @@ def __init__(self,
         # default is no stratification
         self._strata = None
 
-        # check and set dml_procedure and score
         if (not isinstance(dml_procedure, str)) | (dml_procedure not in ['dml1', 'dml2']):
             raise ValueError('dml_procedure must be "dml1" or "dml2". '
                              f'Got {str(dml_procedure)}.')
         self._dml_procedure = dml_procedure
+
+        if dml_procedure == 'dml1':
+            deprication_dml_procedure()
+        self._dml_procedure = dml_procedure
+
         self._score = score
 
         if (self.n_folds == 1) & self.apply_cross_fitting:

From f8d080a7fa61ff1018588febd7dc86ba2e803c60 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Thu, 21 Dec 2023 14:13:00 +0100
Subject: [PATCH 111/134] Update _utils_resampling.py

---
 doubleml/_utils_resampling.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doubleml/_utils_resampling.py b/doubleml/_utils_resampling.py
index 98495e3d..93546b06 100644
--- a/doubleml/_utils_resampling.py
+++ b/doubleml/_utils_resampling.py
@@ -25,7 +25,6 @@ def __init__(self,
         self.n_obs = n_obs
         if not apply_cross_fitting:
             deprication_apply_cross_fitting()
-            apply_cross_fitting = True
         self.apply_cross_fitting = apply_cross_fitting
         self.stratify = stratify
         if (self.n_folds == 1) & self.apply_cross_fitting:

From 0b59332125783f2606b00faba5ea845487a0dcdd Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Mon, 29 Jan 2024 11:13:44 +0100
Subject: [PATCH 112/134] add shapes to attribute descritpion of predictions
 and scores

---
 doubleml/double_ml.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 40efe090..f1138148 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -272,7 +272,7 @@ def params_names(self):
     @property
     def predictions(self):
         """
-        The predictions of the nuisance models.
+        The predictions of the nuisance models with shape ``(n_obs, n_rep, n_coefs)``.
         """
         return self._predictions
 
@@ -354,6 +354,7 @@ def psi(self):
         Values of the score function after calling :meth:`fit`;
         For models (e.g., PLR, IRM, PLIV, IIVM) with linear score (in the parameter)
         :math:`\\psi(W; \\theta, \\eta) = \\psi_a(W; \\eta) \\theta + \\psi_b(W; \\eta)`.
+        The shape is ``(n_obs, n_rep, n_coefs)``.
         """
         return self._psi
 
@@ -364,6 +365,7 @@ def psi_deriv(self):
         after calling :meth:`fit`;
         For models (e.g., PLR, IRM, PLIV, IIVM) with linear score (in the parameter)
         :math:`\\psi_a(W; \\eta)`.
+        The shape is ``(n_obs, n_rep, n_coefs)``.
         """
         return self._psi_deriv
 

From 23f8cb4630e778dc40ddf3bc07b5b691346d2de2 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 30 Jan 2024 10:57:08 +0100
Subject: [PATCH 113/134] update utils_dml_cv_predict for different python
 versions

_fit_and_predit from sklearn did remove the verbose argument
---
 doubleml/tests/_utils_dml_cv_predict.py | 38 ++++++++++++++++++-------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/doubleml/tests/_utils_dml_cv_predict.py b/doubleml/tests/_utils_dml_cv_predict.py
index 8ec1d95d..2225cfe3 100644
--- a/doubleml/tests/_utils_dml_cv_predict.py
+++ b/doubleml/tests/_utils_dml_cv_predict.py
@@ -8,6 +8,21 @@
 from sklearn.preprocessing import LabelEncoder
 from sklearn.model_selection._validation import _fit_and_predict, _check_is_permutation
 
+# Adapt _fit_and_predict for earlier sklearn versions
+from distutils.version import LooseVersion
+from sklearn import __version__ as sklearn_version
+
+if LooseVersion(sklearn_version) < LooseVersion("1.4.0"):
+    def _fit_and_predict_adapted(estimator, x, y, train, test, fit_params, method):
+        res = _fit_and_predict(estimator, x, y, train, test,
+                               verbose=0,
+                               fit_params=fit_params,
+                               method=method)
+        return res
+else:
+    def _fit_and_predict_adapted(estimator, x, y, train, test, fit_params, method):
+        return _fit_and_predict(estimator, x, y, train, test, fit_params, method)
+
 
 def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
                                n_jobs=None, est_params=None, method='predict'):
@@ -22,18 +37,19 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
         train_index, test_index = smpls[0]
         # set some defaults aligned with cross_val_predict
         fit_params = None
-        verbose = 0
         if method == 'predict_proba':
             predictions = np.full((len(y), 2), np.nan)
         else:
             predictions = np.full(len(y), np.nan)
         if est_params is None:
-            xx = _fit_and_predict(clone(estimator),
-                                  x, y, train_index, test_index, verbose, fit_params, method)
+            xx = _fit_and_predict_adapted(
+                clone(estimator),
+                x, y, train_index, test_index, fit_params, method)
         else:
             assert isinstance(est_params, dict)
-            xx = _fit_and_predict(clone(estimator).set_params(**est_params),
-                                  x, y, train_index, test_index, verbose, fit_params, method)
+            xx = _fit_and_predict_adapted(
+                clone(estimator).set_params(**est_params),
+                x, y, train_index, test_index, fit_params, method)
 
         # implementation is (also at other parts) restricted to a sorted set of test_indices, but this could be fixed
         # inv_test_indices = np.argsort(test_indices)
@@ -61,22 +77,22 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
                         pre_dispatch=pre_dispatch)
     # FixMe: Find a better way to handle the different combinations of paramters and smpls_is_partition
     if est_params is None:
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
+        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
             estimator,
-            x, y, train_index, test_index, verbose, fit_params, method)
+            x, y, train_index, test_index, fit_params, method)
                                      for idx, (train_index, test_index) in enumerate(smpls))
     elif isinstance(est_params, dict):
         # if no fold-specific parameters we redirect to the standard method
         # warnings.warn("Using the same (hyper-)parameters for all folds")
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
+        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
             clone(estimator).set_params(**est_params),
-            x, y, train_index, test_index, verbose, fit_params, method)
+            x, y, train_index, test_index, fit_params, method)
                                      for idx, (train_index, test_index) in enumerate(smpls))
     else:
         assert len(est_params) == len(smpls), 'provide one parameter setting per fold'
-        prediction_blocks = parallel(delayed(_fit_and_predict)(
+        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
             clone(estimator).set_params(**est_params[idx]),
-            x, y, train_index, test_index, verbose, fit_params, method)
+            x, y, train_index, test_index, fit_params, method)
             for idx, (train_index, test_index) in enumerate(smpls))
 
     # Concatenate the predictions

From fed64c72fbca52f5c0801716867d9e03f45f5dd1 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 30 Jan 2024 13:46:28 +0100
Subject: [PATCH 114/134] fix penalty logistic regression

---
 doubleml/tests/test_irm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/tests/test_irm.py b/doubleml/tests/test_irm.py
index 672feb4c..e56eea65 100644
--- a/doubleml/tests/test_irm.py
+++ b/doubleml/tests/test_irm.py
@@ -278,7 +278,7 @@ def dml_irm_weights_fixture(n_rep, dml_procedure):
 
     # First stage estimation
     ml_g = LinearRegression()
-    ml_m = LogisticRegression(penalty='none', random_state=42)
+    ml_m = LogisticRegression(penalty='l2', random_state=42)
 
     # ATE with and without weights
     dml_irm_obj_ate_no_weights = dml.DoubleMLIRM(

From 86ff6ddb7f3c1caefcb58c109b9aed47a3d7f800 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 30 Jan 2024 15:04:57 +0100
Subject: [PATCH 115/134] add basic version of gain_statistics to utils

---
 doubleml/double_ml.py             | 43 +---------------
 doubleml/utils/gain_statistics.py | 85 +++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 41 deletions(-)
 create mode 100644 doubleml/utils/gain_statistics.py

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index f1138148..f33e4c48 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -19,7 +19,7 @@
 from ._utils_checks import _check_in_zero_one, _check_integer, _check_float, _check_bool, _check_is_partition, \
     _check_all_smpls, _check_smpl_split, _check_smpl_split_tpl, _check_benchmarks
 from ._utils_plots import _sensitivity_contour_plot
-
+from .utils.gain_statistics import gain_statistics
 
 _implemented_data_backends = ['DoubleMLData', 'DoubleMLClusterData']
 
@@ -1997,45 +1997,6 @@ def sensitivity_benchmark(self, benchmarking_set):
         dml_short._dml_data.x_cols = x_list_short
         dml_short.fit()
 
-        # save elements for readability
-        var_y = np.var(self._dml_data.y)
-        var_y_residuals_long = np.squeeze(self.sensitivity_elements['sigma2'], axis=0)
-        nu2_long = np.squeeze(self.sensitivity_elements['nu2'], axis=0)
-        var_y_residuals_short = np.squeeze(dml_short.sensitivity_elements['sigma2'], axis=0)
-        nu2_short = np.squeeze(dml_short.sensitivity_elements['nu2'], axis=0)
-
-        # compute nonparametric R2
-        R2_y_long = 1.0 - np.divide(var_y_residuals_long, var_y)
-        R2_y_short = 1.0 - np.divide(var_y_residuals_short, var_y)
-        R2_riesz = np.divide(nu2_short, nu2_long)
-
-        # Gain statistics
-        all_cf_y_benchmark = np.clip(np.divide((R2_y_long - R2_y_short), (1.0 - R2_y_long)), 0, 1)
-        all_cf_d_benchmark = np.clip(np.divide((1.0 - R2_riesz), R2_riesz), 0, 1)
-        cf_y_benchmark = np.median(all_cf_y_benchmark, axis=0)
-        cf_d_benchmark = np.median(all_cf_d_benchmark, axis=0)
-
-        # change in estimates (slightly different to paper)
-        all_delta_theta = np.transpose(dml_short.all_coef - self.all_coef)
-        delta_theta = np.median(all_delta_theta, axis=0)
-
-        # degree of adversity
-        var_g = var_y_residuals_short - var_y_residuals_long
-        var_riesz = nu2_long - nu2_short
-        denom = np.sqrt(np.multiply(var_g, var_riesz), out=np.zeros_like(var_g), where=(var_g > 0) & (var_riesz > 0))
-        rho_sign = np.sign(all_delta_theta)
-        rho_values = np.clip(np.divide(np.absolute(all_delta_theta),
-                                       denom,
-                                       out=np.ones_like(all_delta_theta),
-                                       where=denom != 0),
-                             0.0, 1.0)
-        all_rho_benchmark = np.multiply(rho_values, rho_sign)
-        rho_benchmark = np.median(all_rho_benchmark, axis=0)
-        benchmark_dict = {
-            "cf_y": cf_y_benchmark,
-            "cf_d": cf_d_benchmark,
-            "rho": rho_benchmark,
-            "delta_theta": delta_theta,
-        }
+        benchmark_dict = gain_statistics(dml_long=self, dml_short=dml_short)
         df_benchmark = pd.DataFrame(benchmark_dict, index=self._dml_data.d_cols)
         return df_benchmark
diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py
new file mode 100644
index 00000000..849a7755
--- /dev/null
+++ b/doubleml/utils/gain_statistics.py
@@ -0,0 +1,85 @@
+import numpy as np
+
+
+def gain_statistics(dml_long, dml_short):
+    """
+    Compute gain statistics as benchmark values for sensitivity parameters cf_d and cf_y.
+
+    Parameters:
+    ----------
+
+    dml_long : :class:`doubleml.DoubleML` model including all observed confounders
+    dml_short : :class:`doubleml.DoubleML` model that excludes one or several benchmark confounders
+
+
+    Returns:
+    --------
+    Benchmarking dictionary (dict) with values for cf_d, cf_y, rho, and delta_theta.
+
+    """
+    expected_keys = ['sigma2', 'nu2']
+    if not all(key in dml_long.sensitivity_elements.keys() for key in expected_keys):
+        raise ValueError("dml_long does not contain the necessary sensitivity elements. "
+                         "Required keys are: " + str(expected_keys))
+    if not all(key in dml_short.sensitivity_elements.keys() for key in expected_keys):
+        raise ValueError("dml_short does not contain the necessary sensitivity elements. "
+                         "Required keys are: " + str(expected_keys))
+
+    for key in expected_keys:
+        if not isinstance(dml_long.sensitivity_elements[key], np.ndarray):
+            raise TypeError("dml_long does not contain the necessary sensitivity elements. "
+                            f"Expected numpy.ndarray for key {key}.")
+        if not isinstance(dml_short.sensitivity_elements[key], np.ndarray):
+            raise TypeError("dml_short does not contain the necessary sensitivity elements. "
+                            f"Expected numpy.ndarray for key {key}.")
+        if not np.array_equal(dml_long.sensitivity_elements[key].shape, dml_short.sensitivity_elements[key].shape):
+            raise ValueError("dml_long and dml_short do not contain the same shape of sensitivity elements. "
+                             "Shapes of " + key + " are: " + str(dml_long.sensitivity_elements[key].shape) +
+                             " and " + str(dml_short.sensitivity_elements[key].shape))
+
+    if not isinstance(dml_long.all_coef, np.ndarray):
+        raise TypeError("dml_long.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.")
+    if not isinstance(dml_short.all_coef, np.ndarray):
+        raise TypeError("dml_short.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.")
+
+    # save elements for readability
+    var_y = np.var(dml_long._dml_data.y)
+    var_y_residuals_long = np.squeeze(dml_long.sensitivity_elements['sigma2'], axis=0)
+    nu2_long = np.squeeze(dml_long.sensitivity_elements['nu2'], axis=0)
+    var_y_residuals_short = np.squeeze(dml_short.sensitivity_elements['sigma2'], axis=0)
+    nu2_short = np.squeeze(dml_short.sensitivity_elements['nu2'], axis=0)
+
+    # compute nonparametric R2
+    R2_y_long = 1.0 - np.divide(var_y_residuals_long, var_y)
+    R2_y_short = 1.0 - np.divide(var_y_residuals_short, var_y)
+    R2_riesz = np.divide(nu2_short, nu2_long)
+
+    # Gain statistics
+    all_cf_y_benchmark = np.clip(np.divide((R2_y_long - R2_y_short), (1.0 - R2_y_long)), 0, 1)
+    all_cf_d_benchmark = np.clip(np.divide((1.0 - R2_riesz), R2_riesz), 0, 1)
+    cf_y_benchmark = np.median(all_cf_y_benchmark, axis=0)
+    cf_d_benchmark = np.median(all_cf_d_benchmark, axis=0)
+
+    # change in estimates (slightly different to paper)
+    all_delta_theta = np.transpose(dml_short.all_coef - dml_long.all_coef)
+    delta_theta = np.median(all_delta_theta, axis=0)
+
+    # degree of adversity
+    var_g = var_y_residuals_short - var_y_residuals_long
+    var_riesz = nu2_long - nu2_short
+    denom = np.sqrt(np.multiply(var_g, var_riesz), out=np.zeros_like(var_g), where=(var_g > 0) & (var_riesz > 0))
+    rho_sign = np.sign(all_delta_theta)
+    rho_values = np.clip(np.divide(np.absolute(all_delta_theta),
+                                   denom,
+                                   out=np.ones_like(all_delta_theta),
+                                   where=denom != 0),
+                         0.0, 1.0)
+    all_rho_benchmark = np.multiply(rho_values, rho_sign)
+    rho_benchmark = np.median(all_rho_benchmark, axis=0)
+    benchmark_dict = {
+        "cf_y": cf_y_benchmark,
+        "cf_d": cf_d_benchmark,
+        "rho": rho_benchmark,
+        "delta_theta": delta_theta,
+    }
+    return benchmark_dict

From f303dac8b7e7261a5ae37db6e1531ed0d96c080b Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 30 Jan 2024 15:42:01 +0100
Subject: [PATCH 116/134] add exceptions and tests for gain statistics

---
 doubleml/utils/gain_statistics.py             |  20 ++
 .../tests/test_exceptions_gain_statistics.py  | 177 ++++++++++++++++++
 2 files changed, 197 insertions(+)
 create mode 100644 doubleml/utils/tests/test_exceptions_gain_statistics.py

diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py
index 849a7755..af5808cc 100644
--- a/doubleml/utils/gain_statistics.py
+++ b/doubleml/utils/gain_statistics.py
@@ -17,10 +17,16 @@ def gain_statistics(dml_long, dml_short):
     Benchmarking dictionary (dict) with values for cf_d, cf_y, rho, and delta_theta.
 
     """
+    if not isinstance(dml_long.sensitivity_elements, dict):
+        raise TypeError("dml_long does not contain the necessary sensitivity elements. "
+                        "Expected dict for dml_long.sensitivity_elements.")
     expected_keys = ['sigma2', 'nu2']
     if not all(key in dml_long.sensitivity_elements.keys() for key in expected_keys):
         raise ValueError("dml_long does not contain the necessary sensitivity elements. "
                          "Required keys are: " + str(expected_keys))
+    if not isinstance(dml_short.sensitivity_elements, dict):
+        raise TypeError("dml_short does not contain the necessary sensitivity elements. "
+                        "Expected dict for dml_short.sensitivity_elements.")
     if not all(key in dml_short.sensitivity_elements.keys() for key in expected_keys):
         raise ValueError("dml_short does not contain the necessary sensitivity elements. "
                          "Required keys are: " + str(expected_keys))
@@ -32,6 +38,12 @@ def gain_statistics(dml_long, dml_short):
         if not isinstance(dml_short.sensitivity_elements[key], np.ndarray):
             raise TypeError("dml_short does not contain the necessary sensitivity elements. "
                             f"Expected numpy.ndarray for key {key}.")
+        if len(dml_long.sensitivity_elements[key].shape) != 3 or dml_long.sensitivity_elements[key].shape[0] != 1:
+            raise ValueError("dml_long does not contain the necessary sensitivity elements. "
+                             f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}.")
+        if len(dml_short.sensitivity_elements[key].shape) != 3 or dml_short.sensitivity_elements[key].shape[0] != 1:
+            raise ValueError("dml_short does not contain the necessary sensitivity elements. "
+                             f"Expected 3 dimensions of shape (1, n_coef, n_rep) for key {key}.")
         if not np.array_equal(dml_long.sensitivity_elements[key].shape, dml_short.sensitivity_elements[key].shape):
             raise ValueError("dml_long and dml_short do not contain the same shape of sensitivity elements. "
                              "Shapes of " + key + " are: " + str(dml_long.sensitivity_elements[key].shape) +
@@ -42,6 +54,14 @@ def gain_statistics(dml_long, dml_short):
     if not isinstance(dml_short.all_coef, np.ndarray):
         raise TypeError("dml_short.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.")
 
+    expected_shape = (dml_long.sensitivity_elements['sigma2'].shape[1], dml_long.sensitivity_elements['sigma2'].shape[2])
+    if dml_long.all_coef.shape != expected_shape:
+        raise ValueError("dml_long.all_coef does not contain the necessary coefficients. Expected shape: " +
+                         str(expected_shape))
+    if dml_short.all_coef.shape != expected_shape:
+        raise ValueError("dml_short.all_coef does not contain the necessary coefficients. Expected shape: " +
+                         str(expected_shape))
+
     # save elements for readability
     var_y = np.var(dml_long._dml_data.y)
     var_y_residuals_long = np.squeeze(dml_long.sensitivity_elements['sigma2'], axis=0)
diff --git a/doubleml/utils/tests/test_exceptions_gain_statistics.py b/doubleml/utils/tests/test_exceptions_gain_statistics.py
new file mode 100644
index 00000000..af370c4b
--- /dev/null
+++ b/doubleml/utils/tests/test_exceptions_gain_statistics.py
@@ -0,0 +1,177 @@
+import pytest
+import numpy as np
+
+from doubleml.utils.gain_statistics import gain_statistics
+
+
+class test_dml_class():
+    def __init__(self, sensitivity_elements, all_coef):
+        self.sensitivity_elements = sensitivity_elements
+        self.all_coef = all_coef
+
+
+n_obs = 1
+n_rep = 5
+n_coef = 3
+
+
+@pytest.mark.ci
+def test_doubleml_exception_data():
+    dml_correct = test_dml_class(
+        sensitivity_elements={
+            'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+            'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+        },
+        all_coef=np.random.normal(size=(n_coef, n_rep))
+    )
+
+    # incorrect types
+    dml_incorrect = test_dml_class(
+            sensitivity_elements=np.random.normal(size=(n_obs, n_coef, n_rep)),
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long does not contain the necessary sensitivity elements\. Expected dict for dml_long\.sensitivity_elements\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short does not contain the necessary sensitivity elements\. Expected dict for dml_short\.sensitivity_elements\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect keys
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long does not contain the necessary sensitivity elements\. Required keys are: \['sigma2', 'nu2'\]"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short does not contain the necessary sensitivity elements\. Required keys are: \['sigma2', 'nu2'\]"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect type for keys
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': {},
+                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key sigma2\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key sigma2\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    dml_incorrect = test_dml_class(
+        sensitivity_elements={
+            'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+            'nu2': {}
+        },
+        all_coef=np.random.normal(size=(n_coef, n_rep))
+    )
+    msg = r"dml_long does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key nu2\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short does not contain the necessary sensitivity elements\. Expected numpy\.ndarray for key nu2\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect shape for keys
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs + 1, n_coef, n_rep)),
+                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = (r"dml_long does not contain the necessary sensitivity elements\. "
+           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\.")
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = (r"dml_short does not contain the necessary sensitivity elements\. "
+           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key sigma2\.")
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+                'nu2': np.random.normal(size=(n_obs + 1, n_coef, n_rep))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = (r"dml_long does not contain the necessary sensitivity elements\. "
+           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\.")
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = (r"dml_short does not contain the necessary sensitivity elements\. "
+           r"Expected 3 dimensions of shape \(1, n_coef, n_rep\) for key nu2\.")
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # conflicting shape for keys
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_coef + 1, n_rep)),
+                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
+    msg += r"Shapes of sigma2 are: \(1, 4, 5\) and \(1, 3, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
+    msg += r"Shapes of sigma2 are: \(1, 3, 5\) and \(1, 4, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+                'nu2': np.random.normal(size=(n_obs, n_coef + 1, n_rep))
+            },
+            all_coef=np.random.normal(size=(n_coef, n_rep))
+        )
+    msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
+    msg += r"Shapes of nu2 are: \(1, 4, 5\) and \(1, 3, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_long and dml_short do not contain the same shape of sensitivity elements\. "
+    msg += r"Shapes of nu2 are: \(1, 3, 5\) and \(1, 4, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect type for all_coef
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+            },
+            all_coef={}
+        )
+    msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected numpy\.ndarray\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short\.all_coef does not contain the necessary coefficients\. Expected numpy\.ndarray\."
+    with pytest.raises(TypeError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)
+
+    # incorrect shape for all_coef
+    dml_incorrect = test_dml_class(
+            sensitivity_elements={
+                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+            },
+            all_coef=np.random.normal(size=(n_coef + 1, n_rep))
+        )
+    msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected shape: \(3, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_incorrect, dml_correct)
+    msg = r"dml_short\.all_coef does not contain the necessary coefficients\. Expected shape: \(3, 5\)"
+    with pytest.raises(ValueError, match=msg):
+        _ = gain_statistics(dml_correct, dml_incorrect)

From 54a765bfbc0a1d69fad5bdd153755daecaef543c Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Tue, 30 Jan 2024 15:43:19 +0100
Subject: [PATCH 117/134] Update test_exceptions_gain_statistics.py

---
 doubleml/utils/tests/test_exceptions_gain_statistics.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doubleml/utils/tests/test_exceptions_gain_statistics.py b/doubleml/utils/tests/test_exceptions_gain_statistics.py
index af370c4b..dc451409 100644
--- a/doubleml/utils/tests/test_exceptions_gain_statistics.py
+++ b/doubleml/utils/tests/test_exceptions_gain_statistics.py
@@ -33,7 +33,8 @@ def test_doubleml_exception_data():
     msg = r"dml_long does not contain the necessary sensitivity elements\. Expected dict for dml_long\.sensitivity_elements\."
     with pytest.raises(TypeError, match=msg):
         _ = gain_statistics(dml_incorrect, dml_correct)
-    msg = r"dml_short does not contain the necessary sensitivity elements\. Expected dict for dml_short\.sensitivity_elements\."
+    msg = r"dml_short does not contain the necessary sensitivity elements\. "
+    msg += r"Expected dict for dml_short\.sensitivity_elements\."
     with pytest.raises(TypeError, match=msg):
         _ = gain_statistics(dml_correct, dml_incorrect)
 

From 3ff6d3874a2f1e65eb83f907e2564f4c67268bc4 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 07:45:06 +0100
Subject: [PATCH 118/134] fix shape test for gain statistic

---
 doubleml/utils/gain_statistics.py             |  2 +-
 .../tests/test_exceptions_gain_statistics.py  | 44 +++++++++----------
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py
index af5808cc..7a35db4f 100644
--- a/doubleml/utils/gain_statistics.py
+++ b/doubleml/utils/gain_statistics.py
@@ -54,7 +54,7 @@ def gain_statistics(dml_long, dml_short):
     if not isinstance(dml_short.all_coef, np.ndarray):
         raise TypeError("dml_short.all_coef does not contain the necessary coefficients. Expected numpy.ndarray.")
 
-    expected_shape = (dml_long.sensitivity_elements['sigma2'].shape[1], dml_long.sensitivity_elements['sigma2'].shape[2])
+    expected_shape = (dml_long.sensitivity_elements['sigma2'].shape[2], dml_long.sensitivity_elements['sigma2'].shape[1])
     if dml_long.all_coef.shape != expected_shape:
         raise ValueError("dml_long.all_coef does not contain the necessary coefficients. Expected shape: " +
                          str(expected_shape))
diff --git a/doubleml/utils/tests/test_exceptions_gain_statistics.py b/doubleml/utils/tests/test_exceptions_gain_statistics.py
index dc451409..805a84ed 100644
--- a/doubleml/utils/tests/test_exceptions_gain_statistics.py
+++ b/doubleml/utils/tests/test_exceptions_gain_statistics.py
@@ -11,23 +11,23 @@ def __init__(self, sensitivity_elements, all_coef):
 
 
 n_obs = 1
-n_rep = 5
-n_coef = 3
+n_rep = 3
+n_coef = 5
 
 
 @pytest.mark.ci
 def test_doubleml_exception_data():
     dml_correct = test_dml_class(
         sensitivity_elements={
-            'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
-            'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+            'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+            'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
         },
         all_coef=np.random.normal(size=(n_coef, n_rep))
     )
 
     # incorrect types
     dml_incorrect = test_dml_class(
-            sensitivity_elements=np.random.normal(size=(n_obs, n_coef, n_rep)),
+            sensitivity_elements=np.random.normal(size=(n_obs, n_rep, n_coef)),
             all_coef=np.random.normal(size=(n_coef, n_rep))
         )
     msg = r"dml_long does not contain the necessary sensitivity elements\. Expected dict for dml_long\.sensitivity_elements\."
@@ -41,7 +41,7 @@ def test_doubleml_exception_data():
     # incorrect keys
     dml_incorrect = test_dml_class(
             sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
             },
             all_coef=np.random.normal(size=(n_coef, n_rep))
         )
@@ -56,7 +56,7 @@ def test_doubleml_exception_data():
     dml_incorrect = test_dml_class(
             sensitivity_elements={
                 'sigma2': {},
-                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
             },
             all_coef=np.random.normal(size=(n_coef, n_rep))
         )
@@ -69,7 +69,7 @@ def test_doubleml_exception_data():
 
     dml_incorrect = test_dml_class(
         sensitivity_elements={
-            'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
+            'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
             'nu2': {}
         },
         all_coef=np.random.normal(size=(n_coef, n_rep))
@@ -84,8 +84,8 @@ def test_doubleml_exception_data():
     # incorrect shape for keys
     dml_incorrect = test_dml_class(
             sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs + 1, n_coef, n_rep)),
-                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+                'sigma2': np.random.normal(size=(n_obs + 1, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
             },
             all_coef=np.random.normal(size=(n_coef, n_rep))
         )
@@ -100,8 +100,8 @@ def test_doubleml_exception_data():
 
     dml_incorrect = test_dml_class(
             sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
-                'nu2': np.random.normal(size=(n_obs + 1, n_coef, n_rep))
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs + 1, n_rep, n_coef))
             },
             all_coef=np.random.normal(size=(n_coef, n_rep))
         )
@@ -117,8 +117,8 @@ def test_doubleml_exception_data():
     # conflicting shape for keys
     dml_incorrect = test_dml_class(
             sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_coef + 1, n_rep)),
-                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+                'sigma2': np.random.normal(size=(n_obs, n_rep + 1, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
             },
             all_coef=np.random.normal(size=(n_coef, n_rep))
         )
@@ -133,8 +133,8 @@ def test_doubleml_exception_data():
 
     dml_incorrect = test_dml_class(
             sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
-                'nu2': np.random.normal(size=(n_obs, n_coef + 1, n_rep))
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep + 1, n_coef))
             },
             all_coef=np.random.normal(size=(n_coef, n_rep))
         )
@@ -150,8 +150,8 @@ def test_doubleml_exception_data():
     # incorrect type for all_coef
     dml_incorrect = test_dml_class(
             sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
-                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
             },
             all_coef={}
         )
@@ -165,14 +165,14 @@ def test_doubleml_exception_data():
     # incorrect shape for all_coef
     dml_incorrect = test_dml_class(
             sensitivity_elements={
-                'sigma2': np.random.normal(size=(n_obs, n_coef, n_rep)),
-                'nu2': np.random.normal(size=(n_obs, n_coef, n_rep))
+                'sigma2': np.random.normal(size=(n_obs, n_rep, n_coef)),
+                'nu2': np.random.normal(size=(n_obs, n_rep, n_coef))
             },
             all_coef=np.random.normal(size=(n_coef + 1, n_rep))
         )
-    msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected shape: \(3, 5\)"
+    msg = r"dml_long\.all_coef does not contain the necessary coefficients\. Expected shape: \(5, 3\)"
     with pytest.raises(ValueError, match=msg):
         _ = gain_statistics(dml_incorrect, dml_correct)
-    msg = r"dml_short\.all_coef does not contain the necessary coefficients\. Expected shape: \(3, 5\)"
+    msg = r"dml_short\.all_coef does not contain the necessary coefficients\. Expected shape: \(5, 3\)"
     with pytest.raises(ValueError, match=msg):
         _ = gain_statistics(dml_correct, dml_incorrect)

From ab135cc7a53c4df7e85843a3a0bdced0aecabcc7 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 08:05:19 +0100
Subject: [PATCH 119/134] add exceptions for ATTE weights

---
 doubleml/_utils_checks.py                  | 19 ++++++++++++++++---
 doubleml/tests/test_doubleml_exceptions.py | 14 ++++++++++----
 2 files changed, 26 insertions(+), 7 deletions(-)

diff --git a/doubleml/_utils_checks.py b/doubleml/_utils_checks.py
index 39be143d..834b904f 100644
--- a/doubleml/_utils_checks.py
+++ b/doubleml/_utils_checks.py
@@ -230,12 +230,13 @@ def _check_benchmarks(benchmarks):
 
 def _check_weights(weights, score, n_obs, n_rep):
     if weights is not None:
-        if score != "ATE":
-            raise NotImplementedError("weights can only be set for score type 'ATE'. "
-                                      f"{score} was passed.")
+
+        # check general type
         if (not isinstance(weights, np.ndarray)) and (not isinstance(weights, dict)):
             raise TypeError("weights must be a numpy array or dictionary. "
                             f"weights of type {str(type(weights))} was passed.")
+
+        # check shape
         if isinstance(weights, np.ndarray):
             if (weights.ndim != 1) or weights.shape[0] != n_obs:
                 raise ValueError(f"weights must have shape ({n_obs},). "
@@ -245,7 +246,19 @@ def _check_weights(weights, score, n_obs, n_rep):
             if weights.sum() == 0:
                 raise ValueError("At least one weight must be non-zero.")
 
+        # check special form for ATTE score
+        if score == "ATTE":
+            if not isinstance(weights, np.ndarray):
+                raise TypeError("weights must be a numpy array for ATTE score. "
+                                f"weights of type {str(type(weights))} was passed.")
+
+            is_binary = np.all((np.power(weights, 2) - weights) == 0)
+            if not is_binary:
+                raise ValueError("weights must be binary for ATTE score.")
+
+        # check general form for ATE score
         if isinstance(weights, dict):
+            assert score == "ATE"
             expected_keys = ["weights", "weights_bar"]
             if not set(weights.keys()) == set(expected_keys):
                 raise ValueError(f"weights must have keys {expected_keys}. "
diff --git a/doubleml/tests/test_doubleml_exceptions.py b/doubleml/tests/test_doubleml_exceptions.py
index dc723df3..0c705d3c 100644
--- a/doubleml/tests/test_doubleml_exceptions.py
+++ b/doubleml/tests/test_doubleml_exceptions.py
@@ -428,16 +428,17 @@ def test_doubleml_exception_trimming_rule():
 
 @pytest.mark.ci
 def test_doubleml_exception_weights():
-    msg = "weights can only be set for score type 'ATE'. ATTE was passed."
-    with pytest.raises(NotImplementedError, match=msg):
-        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
-                        score='ATTE', weights=np.ones_like(dml_data_irm.d))
+
     msg = "weights must be a numpy array or dictionary. weights of type <class 'int'> was passed."
     with pytest.raises(TypeError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights=1)
     msg = r"weights must have keys \['weights', 'weights_bar'\]. keys dict_keys\(\['d'\]\) were passed."
     with pytest.raises(ValueError, match=msg):
         _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(), weights={'d': [1, 2, 3]})
+    msg = "weights must be a numpy array for ATTE score. weights of type <class 'dict'> was passed."
+    with pytest.raises(TypeError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        score='ATTE', weights={'weights': np.ones_like(dml_data_irm.d)})
 
     # shape checks
     msg = rf"weights must have shape \({n},\). weights of shape \(1,\) was passed."
@@ -485,6 +486,11 @@ def test_doubleml_exception_weights():
                         weights={'weights': np.ones((dml_data_irm.d.shape[0], )),
                                  'weights_bar': np.zeros((dml_data_irm.d.shape[0], 1))})
 
+    msg = "weights must be binary for ATTE score."
+    with pytest.raises(ValueError, match=msg):
+        _ = DoubleMLIRM(dml_data_irm, Lasso(), LogisticRegression(),
+                        score='ATTE', weights=np.random.choice([0, 0.2], dml_data_irm.d.shape[0]))
+
 
 @pytest.mark.ci
 def test_doubleml_exception_quantiles():

From 0e793c8a888b0b6691484f5521fb35e9bc8968c8 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 08:23:56 +0100
Subject: [PATCH 120/134] update weights for ATTE

---
 doubleml/double_ml_irm.py | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 21923c0b..a2bcf0e1 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -219,11 +219,26 @@ def _initialize_weights(self, weights):
             self._weights = weights
 
     def _get_weights(self):
-        weights = self._weights['weights']
-        if 'weights_bar' not in self._weights.keys():
-            weights_bar = self._weights['weights']
+        # standard case for ATE
+        if self.score == 'ATE':
+            weights = self._weights['weights']
+            if 'weights_bar' not in self._weights.keys():
+                weights_bar = self._weights['weights']
+            else:
+                weights_bar = self._weights['weights_bar'][:, self._i_rep]
         else:
-            weights_bar = self._weights['weights_bar'][:, self._i_rep]
+            # special case for ATTE
+            assert self.score == 'ATTE'
+            subgroup = self._weights['weights'] * self._dml_data.d
+            subgroup_probability = np.mean(subgroup)
+            weights = np.divide(subgroup, subgroup_probability)
+
+            treatment_name = self._dml_data.d_cols[0]  # only one treatment variable
+            m_hat = self.predictions[treatment_name]['ml_m']
+            weights_bar = np.divide(
+                np.multiply(m_hat, subgroup.reshape(-1, 1)),
+                subgroup_probability)
+
         return weights, weights_bar
 
     def _check_data(self, obj_dml_data):

From 851fadf040cd5aaff9df59c1a1313067c5a55c75 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 09:27:40 +0100
Subject: [PATCH 121/134] fix weights

---
 doubleml/double_ml_irm.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index a2bcf0e1..4c9a6be2 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -218,7 +218,7 @@ def _initialize_weights(self, weights):
             assert isinstance(weights, dict)
             self._weights = weights
 
-    def _get_weights(self):
+    def _get_weights(self, m_hat=None):
         # standard case for ATE
         if self.score == 'ATE':
             weights = self._weights['weights']
@@ -229,14 +229,13 @@ def _get_weights(self):
         else:
             # special case for ATTE
             assert self.score == 'ATTE'
+            assert m_hat is not None
             subgroup = self._weights['weights'] * self._dml_data.d
             subgroup_probability = np.mean(subgroup)
             weights = np.divide(subgroup, subgroup_probability)
 
-            treatment_name = self._dml_data.d_cols[0]  # only one treatment variable
-            m_hat = self.predictions[treatment_name]['ml_m']
             weights_bar = np.divide(
-                np.multiply(m_hat, subgroup.reshape(-1, 1)),
+                np.multiply(m_hat, self._weights['weights']),
                 subgroup_probability)
 
         return weights, weights_bar
@@ -372,7 +371,7 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
 
         if isinstance(self.score, str):
             if self.score == 'ATE':
-                weights, weights_bar = self._get_weights()
+                weights, weights_bar = self._get_weights(m_hat=m_hat_adj)
                 psi_b = weights * (g_hat1 - g_hat0) \
                     + weights_bar * (
                         np.divide(np.multiply(d, u_hat1), m_hat_adj)
@@ -403,7 +402,7 @@ def _sensitivity_element_est(self, preds):
 
         # use weights make this extendable
         if self.score == 'ATE':
-            weights, weights_bar = self._get_weights()
+            weights, weights_bar = self._get_weights(m_hat=m_hat)
         else:
             assert self.score == 'ATTE'
             weights = np.divide(d, np.mean(d))

From cf792f5867fe9a8431b0f2e049ef1c95607076a3 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 09:40:29 +0100
Subject: [PATCH 122/134] add unit test for weights one vs old score

---
 doubleml/tests/test_irm_weighted_scores.py | 117 +++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 doubleml/tests/test_irm_weighted_scores.py

diff --git a/doubleml/tests/test_irm_weighted_scores.py b/doubleml/tests/test_irm_weighted_scores.py
new file mode 100644
index 00000000..83fa43ea
--- /dev/null
+++ b/doubleml/tests/test_irm_weighted_scores.py
@@ -0,0 +1,117 @@
+import pytest
+import numpy as np
+
+from sklearn.base import clone
+from sklearn.linear_model import LogisticRegression, LinearRegression
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
+
+import doubleml as dml
+
+from .._utils import _normalize_ipw
+
+
+def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw):
+    # fraction of treated for ATTE
+    p_hat = None
+    if score == 'ATTE':
+        p_hat = np.mean(d)
+
+    if normalize_ipw:
+        m_hat = _normalize_ipw(m_hat, d)
+
+    # compute residuals
+    u_hat0 = y - g_hat0
+    u_hat1 = None
+    if score == 'ATE':
+        u_hat1 = y - g_hat1
+
+    if isinstance(score, str):
+        if score == 'ATE':
+            psi_b = g_hat1 - g_hat0 \
+                + np.divide(np.multiply(d, u_hat1), m_hat) \
+                - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
+            psi_a = np.full_like(m_hat, -1.0)
+        else:
+            assert score == 'ATTE'
+            psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
+                - np.divide(np.multiply(m_hat, np.multiply(1.0-d, u_hat0)),
+                            np.multiply(p_hat, (1.0 - m_hat)))
+            psi_a = - np.divide(d, p_hat)
+
+    return psi_a, psi_b
+
+
+@pytest.fixture(scope='module',
+                params=[[LinearRegression(),
+                         LogisticRegression(solver='lbfgs', max_iter=250)],
+                        [RandomForestRegressor(max_depth=5, n_estimators=10, random_state=42),
+                         RandomForestClassifier(max_depth=5, n_estimators=10, random_state=42)]])
+def learner(request):
+    return request.param
+
+
+@pytest.fixture(scope='module',
+                params=['ATE', 'ATTE'])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(scope='module',
+                params=[False, True])
+def normalize_ipw(request):
+    return request.param
+
+
+@pytest.fixture(scope='module',
+                params=[0.2, 0.15])
+def trimming_threshold(request):
+    return request.param
+
+
+@pytest.fixture(scope='module')
+def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_ipw, trimming_threshold):
+    n_folds = 2
+
+    # collect data
+    (x, y, d) = generate_data_irm
+    obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d)
+
+    # Set machine learning methods for m & g
+    ml_g = clone(learner[0])
+    ml_m = clone(learner[1])
+
+    np.random.seed(3141)
+    dml_irm_obj = dml.DoubleMLIRM(obj_dml_data,
+                                  ml_g, ml_m,
+                                  n_folds,
+                                  score=score,
+                                  normalize_ipw=normalize_ipw,
+                                  trimming_threshold=trimming_threshold)
+    dml_irm_obj.fit()
+
+    # old score
+    psi_a_old, psi_b_old = old_score_elements(
+        y=y,
+        d=d,
+        g_hat0=np.squeeze(dml_irm_obj.predictions['ml_g0']),
+        g_hat1=np.squeeze(dml_irm_obj.predictions['ml_g1']),
+        m_hat=np.squeeze(dml_irm_obj.predictions['ml_m']),
+        score=score,
+        normalize_ipw=normalize_ipw
+    )
+
+    result_dict = {
+        'psi_a': np.squeeze(dml_irm_obj.psi_elements['psi_a']),
+        'psi_b': np.squeeze(dml_irm_obj.psi_elements['psi_b']),
+        'psi_a_old': psi_a_old,
+        'psi_b_old': psi_b_old,
+    }
+    return result_dict
+
+
+@pytest.mark.ci
+def test_irm_old_vs_weighted_score(old_vs_weighted_score_fixture):
+    assert np.allclose(old_vs_weighted_score_fixture['psi_a'],
+                       old_vs_weighted_score_fixture['psi_a_old'])
+    assert np.allclose(old_vs_weighted_score_fixture['psi_b'],
+                       old_vs_weighted_score_fixture['psi_b_old'])

From a3cc1953e1f3d6215d12f7895e7ba98a5b657a25 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 09:53:22 +0100
Subject: [PATCH 123/134] add tests on coef and psi elements

---
 doubleml/tests/test_irm_weighted_scores.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/doubleml/tests/test_irm_weighted_scores.py b/doubleml/tests/test_irm_weighted_scores.py
index 83fa43ea..412b3d51 100644
--- a/doubleml/tests/test_irm_weighted_scores.py
+++ b/doubleml/tests/test_irm_weighted_scores.py
@@ -100,18 +100,32 @@ def old_vs_weighted_score_fixture(generate_data_irm, learner, score, normalize_i
         normalize_ipw=normalize_ipw
     )
 
+    old_coef = -np.mean(psi_b_old) / np.mean(psi_a_old)
+
     result_dict = {
         'psi_a': np.squeeze(dml_irm_obj.psi_elements['psi_a']),
         'psi_b': np.squeeze(dml_irm_obj.psi_elements['psi_b']),
         'psi_a_old': psi_a_old,
         'psi_b_old': psi_b_old,
+        'coef': np.squeeze(dml_irm_obj.coef),
+        'old_coef': old_coef,
     }
     return result_dict
 
 
 @pytest.mark.ci
-def test_irm_old_vs_weighted_score(old_vs_weighted_score_fixture):
-    assert np.allclose(old_vs_weighted_score_fixture['psi_a'],
-                       old_vs_weighted_score_fixture['psi_a_old'])
+def test_irm_old_vs_weighted_score_psi_b(old_vs_weighted_score_fixture):
     assert np.allclose(old_vs_weighted_score_fixture['psi_b'],
                        old_vs_weighted_score_fixture['psi_b_old'])
+
+
+@pytest.mark.ci
+def test_irm_old_vs_weighted_score_psi_a(old_vs_weighted_score_fixture):
+    assert np.allclose(old_vs_weighted_score_fixture['psi_a'],
+                       old_vs_weighted_score_fixture['psi_a_old'])
+
+
+@pytest.mark.ci
+def test_irm_old_vs_weighted_coef(old_vs_weighted_score_fixture):
+    assert np.allclose(old_vs_weighted_score_fixture['coef'],
+                       old_vs_weighted_score_fixture['old_coef'])

From 8da92b1c4e0d7316f1e38adc55addfaf4fba0643 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 09:56:23 +0100
Subject: [PATCH 124/134] update score estimation

---
 doubleml/double_ml_irm.py | 41 +++++++++++++++------------------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 4c9a6be2..89a1387c 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -348,11 +348,6 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
 
     def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
 
-        # fraction of treated for ATTE
-        p_hat = None
-        if self.score == 'ATTE':
-            p_hat = np.mean(d)
-
         m_hat_adj = np.full_like(m_hat, np.nan, dtype='float64')
         if self.normalize_ipw:
             if self.dml_procedure == 'dml1':
@@ -365,24 +360,21 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
 
         # compute residuals
         u_hat0 = y - g_hat0
-        u_hat1 = None
-        if self.score == 'ATE':
-            u_hat1 = y - g_hat1
-
-        if isinstance(self.score, str):
+        if self.score == 'ATTE':
+            g_hat1 = y
+        u_hat1 = y - g_hat1
+
+        if (self.score == 'ATE') or (self.score == 'ATTE'):
+            weights, weights_bar = self._get_weights(m_hat=m_hat_adj)
+            psi_b = weights * (g_hat1 - g_hat0) \
+                + weights_bar * (
+                    np.divide(np.multiply(d, u_hat1), m_hat_adj)
+                    - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat_adj))
             if self.score == 'ATE':
-                weights, weights_bar = self._get_weights(m_hat=m_hat_adj)
-                psi_b = weights * (g_hat1 - g_hat0) \
-                    + weights_bar * (
-                        np.divide(np.multiply(d, u_hat1), m_hat_adj)
-                        - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat_adj))
                 psi_a = np.full_like(m_hat_adj, -1.0)
             else:
                 assert self.score == 'ATTE'
-                psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
-                    - np.divide(np.multiply(m_hat_adj, np.multiply(1.0-d, u_hat0)),
-                                np.multiply(p_hat, (1.0 - m_hat_adj)))
-                psi_a = - np.divide(d, p_hat)
+                psi_a = -1.0 * np.divide(d, np.mean(d))
         else:
             assert callable(self.score)
             psi_a, psi_b = self.score(y=y, d=d,
@@ -398,15 +390,14 @@ def _sensitivity_element_est(self, preds):
 
         m_hat = preds['predictions']['ml_m']
         g_hat0 = preds['predictions']['ml_g0']
-        g_hat1 = preds['predictions']['ml_g1']
-
-        # use weights make this extendable
         if self.score == 'ATE':
-            weights, weights_bar = self._get_weights(m_hat=m_hat)
+            g_hat1 = preds['predictions']['ml_g1']
         else:
             assert self.score == 'ATTE'
-            weights = np.divide(d, np.mean(d))
-            weights_bar = np.divide(m_hat, np.mean(d))
+            g_hat1 = y
+
+        # use weights make this extendable
+        weights, weights_bar = self._get_weights(m_hat=m_hat)
 
         sigma2_score_element = np.square(y - np.multiply(d, g_hat1) - np.multiply(1.0-d, g_hat0))
         sigma2 = np.mean(sigma2_score_element)

From f1cae38a18f65c24834970e7377c8024fa4344f9 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 10:30:23 +0100
Subject: [PATCH 125/134] remove g1 estimation for atte

---
 doubleml/double_ml_irm.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 89a1387c..19cc0dd9 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -291,8 +291,13 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
                                      f'predictions obtained with the ml_g learner {str(self._learner["ml_g"])} are also '
                                      'observed to be binary with values 0 and 1. Make sure that for classifiers '
                                      'probabilities and not labels are predicted.')
+        if self.score == 'ATTE':
+            # skip g_hat1 estimation
+            g_hat1 = {'preds': None,
+                      'targets': None,
+                      'models': None}
 
-        if g1_external:
+        elif g1_external:
             # use external predictions
             g_hat1 = {'preds': external_predictions['ml_g1'],
                       'targets': None,

From 301d98fb88483e0dc1c1d5c5534f9484d6aa54d1 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 10:34:28 +0100
Subject: [PATCH 126/134] Update _utils_irm_manual.py

---
 doubleml/tests/_utils_irm_manual.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/doubleml/tests/_utils_irm_manual.py b/doubleml/tests/_utils_irm_manual.py
index b60c1e07..3333550f 100644
--- a/doubleml/tests/_utils_irm_manual.py
+++ b/doubleml/tests/_utils_irm_manual.py
@@ -298,7 +298,11 @@ def fit_sensitivity_elements_irm(y, d, all_coef, predictions, score, n_rep):
 
         m_hat = predictions['ml_m'][:, i_rep, 0]
         g_hat0 = predictions['ml_g0'][:, i_rep, 0]
-        g_hat1 = predictions['ml_g1'][:, i_rep, 0]
+        if score == 'ATE':
+            g_hat1 = predictions['ml_g1'][:, i_rep, 0]
+        else:
+            assert score == 'ATTE'
+            g_hat1 = y
 
         if score == 'ATE':
             weights = np.ones_like(d)

From f57625580b4c0227409a9b788ed4e5c8cf3fc219 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 10:34:53 +0100
Subject: [PATCH 127/134] adjust binary target check for ATTE score

---
 doubleml/double_ml_irm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 19cc0dd9..6e3df183 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -310,7 +310,7 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
             # adjust target values to consider only compatible subsamples
             g_hat1['targets'] = _cond_targets(g_hat1['targets'], cond_sample=(d == 1))
 
-        if self._dml_data.binary_outcome:
+        if self._dml_data.binary_outcome & (self.score != 'ATTE'):
             binary_preds = (type_of_target(g_hat1['preds']) == 'binary')
             zero_one_preds = np.all((np.power(g_hat1['preds'], 2) - g_hat1['preds']) == 0)
             if binary_preds & zero_one_preds:

From 7afc4b803ec96a14ddac6458fb16a77891672006 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 10:40:21 +0100
Subject: [PATCH 128/134] Update test_irm_weighted_scores.py

---
 doubleml/tests/test_irm_weighted_scores.py | 23 +++++++++++-----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/doubleml/tests/test_irm_weighted_scores.py b/doubleml/tests/test_irm_weighted_scores.py
index 412b3d51..65bbe336 100644
--- a/doubleml/tests/test_irm_weighted_scores.py
+++ b/doubleml/tests/test_irm_weighted_scores.py
@@ -25,18 +25,17 @@ def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw):
     if score == 'ATE':
         u_hat1 = y - g_hat1
 
-    if isinstance(score, str):
-        if score == 'ATE':
-            psi_b = g_hat1 - g_hat0 \
-                + np.divide(np.multiply(d, u_hat1), m_hat) \
-                - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
-            psi_a = np.full_like(m_hat, -1.0)
-        else:
-            assert score == 'ATTE'
-            psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
-                - np.divide(np.multiply(m_hat, np.multiply(1.0-d, u_hat0)),
-                            np.multiply(p_hat, (1.0 - m_hat)))
-            psi_a = - np.divide(d, p_hat)
+    if score == 'ATE':
+        psi_b = g_hat1 - g_hat0 \
+            + np.divide(np.multiply(d, u_hat1), m_hat) \
+            - np.divide(np.multiply(1.0-d, u_hat0), 1.0 - m_hat)
+        psi_a = np.full_like(m_hat, -1.0)
+    else:
+        assert score == 'ATTE'
+        psi_b = np.divide(np.multiply(d, u_hat0), p_hat) \
+            - np.divide(np.multiply(m_hat, np.multiply(1.0-d, u_hat0)),
+                        np.multiply(p_hat, (1.0 - m_hat)))
+        psi_a = - np.divide(d, p_hat)
 
     return psi_a, psi_b
 

From 97bd77e76bedd53491d07632d6dbcf83b06231e0 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Wed, 31 Jan 2024 10:46:33 +0100
Subject: [PATCH 129/134] Update test_irm_weighted_scores.py

---
 doubleml/tests/test_irm_weighted_scores.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doubleml/tests/test_irm_weighted_scores.py b/doubleml/tests/test_irm_weighted_scores.py
index 65bbe336..0994f10f 100644
--- a/doubleml/tests/test_irm_weighted_scores.py
+++ b/doubleml/tests/test_irm_weighted_scores.py
@@ -25,6 +25,8 @@ def old_score_elements(y, d, g_hat0, g_hat1, m_hat, score, normalize_ipw):
     if score == 'ATE':
         u_hat1 = y - g_hat1
 
+    psi_a = np.full_like(y, np.nan)
+    psi_b = np.full_like(y, np.nan)
     if score == 'ATE':
         psi_b = g_hat1 - g_hat0 \
             + np.divide(np.multiply(d, u_hat1), m_hat) \

From e645441cf0fda1a3438fb9bb706b02c6f52e09c9 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Thu, 1 Feb 2024 10:25:51 +0100
Subject: [PATCH 130/134] update psi_a for ATTE score

---
 doubleml/double_ml_irm.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doubleml/double_ml_irm.py b/doubleml/double_ml_irm.py
index 6e3df183..84009533 100644
--- a/doubleml/double_ml_irm.py
+++ b/doubleml/double_ml_irm.py
@@ -379,7 +379,7 @@ def _score_elements(self, y, d, g_hat0, g_hat1, m_hat, smpls):
                 psi_a = np.full_like(m_hat_adj, -1.0)
             else:
                 assert self.score == 'ATTE'
-                psi_a = -1.0 * np.divide(d, np.mean(d))
+                psi_a = -1.0 * weights
         else:
             assert callable(self.score)
             psi_a, psi_b = self.score(y=y, d=d,

From d375cc5ac0af9c9231423964a19c12caee2d2a34 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Thu, 1 Feb 2024 13:23:49 +0100
Subject: [PATCH 131/134] add gain_statistics to utils init

---
 doubleml/utils/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
index 68b9d377..3bdd77cc 100644
--- a/doubleml/utils/__init__.py
+++ b/doubleml/utils/__init__.py
@@ -1,7 +1,9 @@
 from .dummy_learners import DMLDummyRegressor
 from .dummy_learners import DMLDummyClassifier
+from .gain_statistics import gain_statistics
 
 __all__ = [
     "DMLDummyRegressor",
     "DMLDummyClassifier",
+    "gain_statistics"
 ]

From 229a6be88031a6eb0ebdbc8ce9d74f4be56340d9 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Thu, 1 Feb 2024 14:46:48 +0100
Subject: [PATCH 132/134] update dummy learner documentation

---
 doubleml/utils/dummy_learners.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 62127ff7..928b0f2e 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -5,10 +5,12 @@ class DMLDummyRegressor(BaseEstimator):
     """
     A dummy regressor that raises an AttributeError when attempting to access
     its fit, predict, or set_params methods.
+
     Attributes
     ----------
     _estimator_type : str
         Type of the estimator, set to "regressor".
+
     Methods
     -------
     fit(*args)
@@ -35,10 +37,12 @@ class DMLDummyClassifier(BaseEstimator):
     """
     A dummy classifier that raises an AttributeError when attempting to access
     its fit, predict, set_params, or predict_proba methods.
+
     Attributes
     ----------
     _estimator_type : str
         Type of the estimator, set to "classifier".
+
     Methods
     -------
     fit(*args)

From 0bbd655ca48600d508a83b83882ee6b4cbf38d06 Mon Sep 17 00:00:00 2001
From: SvenKlaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Thu, 1 Feb 2024 14:52:01 +0100
Subject: [PATCH 133/134] fix predictions docstring

---
 doubleml/double_ml.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index f33e4c48..12050d6a 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -272,7 +272,8 @@ def params_names(self):
     @property
     def predictions(self):
         """
-        The predictions of the nuisance models with shape ``(n_obs, n_rep, n_coefs)``.
+        The predictions of the nuisance models in form of a dictinary.
+        Each key refers to a nuisance element with a array of values of shape ``(n_obs, n_rep, n_coefs)``.
         """
         return self._predictions
 

From 8431daf98e0debbc6ed5d5756216239591504ae7 Mon Sep 17 00:00:00 2001
From: Sven Klaassen <47529404+SvenKlaassen@users.noreply.github.com>
Date: Fri, 2 Feb 2024 11:16:50 +0100
Subject: [PATCH 134/134] fix gain statistics and dummy learner docstrings

---
 doubleml/utils/dummy_learners.py  | 54 +++++++++++++++++--------------
 doubleml/utils/gain_statistics.py | 16 +++++----
 2 files changed, 39 insertions(+), 31 deletions(-)

diff --git a/doubleml/utils/dummy_learners.py b/doubleml/utils/dummy_learners.py
index 928b0f2e..129fd18d 100644
--- a/doubleml/utils/dummy_learners.py
+++ b/doubleml/utils/dummy_learners.py
@@ -6,30 +6,32 @@ class DMLDummyRegressor(BaseEstimator):
     A dummy regressor that raises an AttributeError when attempting to access
     its fit, predict, or set_params methods.
 
-    Attributes
+    Parameters
     ----------
-    _estimator_type : str
-        Type of the estimator, set to "regressor".
 
-    Methods
-    -------
-    fit(*args)
-        Raises AttributeError: "Accessed fit method of DummyRegressor!"
-    predict(*args)
-        Raises AttributeError: "Accessed predict method of DummyRegressor!"
-    set_params(*args)
-        Raises AttributeError: "Accessed set_params method of DummyRegressor!"
     """
 
     _estimator_type = "regressor"
 
     def fit(*args):
+        """
+        Raises AttributeError: "Accessed fit method of DummyRegressor!"
+        """
+
         raise AttributeError("Accessed fit method of DMLDummyRegressor!")
 
     def predict(*args):
+        """
+        Raises AttributeError: "Accessed predict method of DummyRegressor!"
+        """
+
         raise AttributeError("Accessed predict method of DMLDummyRegressor!")
 
     def set_params(*args):
+        """
+        Raises AttributeError: "Accessed set_params method of DummyRegressor!"
+        """
+
         raise AttributeError("Accessed set_params method of DMLDummyRegressor!")
 
 
@@ -38,33 +40,37 @@ class DMLDummyClassifier(BaseEstimator):
     A dummy classifier that raises an AttributeError when attempting to access
     its fit, predict, set_params, or predict_proba methods.
 
-    Attributes
+    Parameters
     ----------
-    _estimator_type : str
-        Type of the estimator, set to "classifier".
 
-    Methods
-    -------
-    fit(*args)
-        Raises AttributeError: "Accessed fit method of DummyClassifier!"
-    predict(*args)
-        Raises AttributeError: "Accessed predict method of DummyClassifier!"
-    set_params(*args)
-        Raises AttributeError: "Accessed set_params method of DummyClassifier!"
-    predict_proba(*args, **kwargs)
-        Raises AttributeError: "Accessed predict_proba method of DummyClassifier!"
     """
 
     _estimator_type = "classifier"
 
     def fit(*args):
+        """
+        Raises AttributeError: "Accessed fit method of DummyClassifier!"
+        """
+
         raise AttributeError("Accessed fit method of DMLDummyClassifier!")
 
     def predict(*args):
+        """
+        Raises AttributeError: "Accessed predict method of DummyClassifier!"
+        """
+
         raise AttributeError("Accessed predict method of DMLDummyClassifier!")
 
     def set_params(*args):
+        """
+        Raises AttributeError: "Accessed set_params method of DummyClassifier!"
+        """
+
         raise AttributeError("Accessed set_params method of DMLDummyClassifier!")
 
     def predict_proba(*args, **kwargs):
+        """
+        Raises AttributeError: "Accessed predict_proba method of DummyClassifier!"
+        """
+
         raise AttributeError("Accessed predict_proba method of DMLDummyClassifier!")
diff --git a/doubleml/utils/gain_statistics.py b/doubleml/utils/gain_statistics.py
index 7a35db4f..3c50d084 100644
--- a/doubleml/utils/gain_statistics.py
+++ b/doubleml/utils/gain_statistics.py
@@ -3,19 +3,21 @@
 
 def gain_statistics(dml_long, dml_short):
     """
-    Compute gain statistics as benchmark values for sensitivity parameters cf_d and cf_y.
+    Compute gain statistics as benchmark values for sensitivity parameters ``cf_d`` and ``cf_y``.
 
-    Parameters:
+    Parameters
     ----------
 
-    dml_long : :class:`doubleml.DoubleML` model including all observed confounders
-    dml_short : :class:`doubleml.DoubleML` model that excludes one or several benchmark confounders
+    dml_long :
+        :class:`doubleml.DoubleML` model including all observed confounders
 
+    dml_short :
+        :class:`doubleml.DoubleML` model that excludes one or several benchmark confounders
 
-    Returns:
+    Returns
     --------
-    Benchmarking dictionary (dict) with values for cf_d, cf_y, rho, and delta_theta.
-
+    benchmark_dict : dict
+        Benchmarking dictionary (dict) with values for ``cf_d``, ``cf_y``, ``rho``, and ``delta_theta``.
     """
     if not isinstance(dml_long.sensitivity_elements, dict):
         raise TypeError("dml_long does not contain the necessary sensitivity elements. "