Merge pull request #247 from DoubleML/s-incr-dev

Update workflows and dependencies
DoubleML · Jun 10, 2024 · a0d21e4 · a0d21e4
2 parents 65eb510 + 13255a6
commit a0d21e4
Show file tree

Hide file tree

Showing 6 changed files with 18 additions and 33 deletions.
diff --git a/.github/workflows/deploy_pkg.yml b/.github/workflows/deploy_pkg.yml
@@ -19,7 +19,7 @@ jobs:
     - name: Install python
       uses: actions/setup-python@v5
       with:
-        python-version: '3.8'
+        python-version: '3.9'
 
     - name: Install dependencies
       run: |

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -22,12 +22,12 @@ jobs:
     strategy:
       matrix:
         config:
-          - {os: 'ubuntu-latest', python-version: '3.8'}
-          - {os: 'windows-latest', python-version: '3.8'}
-          - {os: 'macOS-latest', python-version: '3.8'}
           - {os: 'ubuntu-latest', python-version: '3.9'}
+          - {os: 'windows-latest', python-version: '3.9'}
+          - {os: 'macOS-latest', python-version: '3.9'}
           - {os: 'ubuntu-latest', python-version: '3.10'}
           - {os: 'ubuntu-latest', python-version: '3.11'}
+          - {os: 'ubuntu-latest', python-version: '3.12'}
 
     steps:
     - uses: actions/checkout@v4
@@ -63,15 +63,15 @@ jobs:
     - name: Test with pytest and coverage
       if: |
         matrix.config.os == 'ubuntu-latest' &&
-        matrix.config.python-version == '3.8'
+        matrix.config.python-version == '3.9'
       run: |
         pip install pytest-cov
         pytest --cov=./ --cov-report=xml
 
     - name: Upload coverage to Codecov
       if: |
         matrix.config.os == 'ubuntu-latest' &&
-        matrix.config.python-version == '3.8'
+        matrix.config.python-version == '3.9'
       uses: codecov/codecov-action@v3
       with:
         file: ./coverage.xml
@@ -80,7 +80,7 @@ jobs:
     - name: Upload coverage to codacy
       if: |
         matrix.config.os == 'ubuntu-latest' &&
-        matrix.config.python-version == '3.8'
+        matrix.config.python-version == '3.9'
       uses: codacy/codacy-coverage-reporter-action@v1
       with:
         project-token: ${{ secrets.CODACY_PROJECT_TOKEN }}

diff --git a/doubleml/__init__.py b/doubleml/__init__.py
@@ -1,4 +1,4 @@
-from pkg_resources import get_distribution
+import importlib.metadata
 
 from .double_ml_framework import concat
 from .double_ml_framework import DoubleMLFramework
@@ -36,4 +36,4 @@
            'DoubleMLPolicyTree',
            'DoubleMLSSM']
 
-__version__ = get_distribution('doubleml').version
+__version__ = importlib.metadata.version('doubleml')
diff --git a/doubleml/datasets.py b/doubleml/datasets.py
@@ -81,12 +81,12 @@ def fetch_bonus(return_type='DoubleMLData', polynomial_features=False):
     doi:`10.1111/ectj.12097 <https://doi.org/10.1111/ectj.12097>`_.
     """
     url = 'https://raw.githubusercontent.com/VC2015/DMLonGitHub/master/penn_jae.dat'
-    raw_data = pd.read_csv(url, delim_whitespace=True)
+    raw_data = pd.read_csv(url, sep='\s+')
 
     ind = (raw_data['tg'] == 0) | (raw_data['tg'] == 4)
     data = raw_data.copy()[ind]
     data.reset_index(inplace=True)
-    data['tg'].replace(4, 1, inplace=True)
+    data['tg'] = data['tg'].replace(4, 1)
     data['inuidur1'] = np.log(data['inuidur1'])
 
     # variable dep as factor (dummy encoding)

diff --git a/doubleml/tests/_utils_dml_cv_predict.py b/doubleml/tests/_utils_dml_cv_predict.py
@@ -8,21 +8,6 @@
 from sklearn.preprocessing import LabelEncoder
 from sklearn.model_selection._validation import _fit_and_predict, _check_is_permutation
 
-# Adapt _fit_and_predict for earlier sklearn versions
-from distutils.version import LooseVersion
-from sklearn import __version__ as sklearn_version
-
-if LooseVersion(sklearn_version) < LooseVersion("1.4.0"):
-    def _fit_and_predict_adapted(estimator, x, y, train, test, fit_params, method):
-        res = _fit_and_predict(estimator, x, y, train, test,
-                               verbose=0,
-                               fit_params=fit_params,
-                               method=method)
-        return res
-else:
-    def _fit_and_predict_adapted(estimator, x, y, train, test, fit_params, method):
-        return _fit_and_predict(estimator, x, y, train, test, fit_params, method)
-
 
 def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
                                n_jobs=None, est_params=None, method='predict'):
@@ -42,12 +27,12 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
         else:
             predictions = np.full(len(y), np.nan)
         if est_params is None:
-            xx = _fit_and_predict_adapted(
+            xx = _fit_and_predict(
                 clone(estimator),
                 x, y, train_index, test_index, fit_params, method)
         else:
             assert isinstance(est_params, dict)
-            xx = _fit_and_predict_adapted(
+            xx = _fit_and_predict(
                 clone(estimator).set_params(**est_params),
                 x, y, train_index, test_index, fit_params, method)
 
@@ -77,20 +62,20 @@ def _dml_cv_predict_ut_version(estimator, x, y, smpls=None,
                         pre_dispatch=pre_dispatch)
     # FixMe: Find a better way to handle the different combinations of paramters and smpls_is_partition
     if est_params is None:
-        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
+        prediction_blocks = parallel(delayed(_fit_and_predict)(
             estimator,
             x, y, train_index, test_index, fit_params, method)
                                      for idx, (train_index, test_index) in enumerate(smpls))
     elif isinstance(est_params, dict):
         # if no fold-specific parameters we redirect to the standard method
         # warnings.warn("Using the same (hyper-)parameters for all folds")
-        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
+        prediction_blocks = parallel(delayed(_fit_and_predict)(
             clone(estimator).set_params(**est_params),
             x, y, train_index, test_index, fit_params, method)
                                      for idx, (train_index, test_index) in enumerate(smpls))
     else:
         assert len(est_params) == len(smpls), 'provide one parameter setting per fold'
-        prediction_blocks = parallel(delayed(_fit_and_predict_adapted)(
+        prediction_blocks = parallel(delayed(_fit_and_predict)(
             clone(estimator).set_params(**est_params[idx]),
             x, y, train_index, test_index, fit_params, method)
             for idx, (train_index, test_index) in enumerate(smpls))

diff --git a/doubleml/utils/_estimation.py b/doubleml/utils/_estimation.py
@@ -6,7 +6,7 @@
 from sklearn.base import clone
 from sklearn.preprocessing import LabelEncoder
 from sklearn.model_selection import KFold, GridSearchCV, RandomizedSearchCV
-from sklearn.metrics import mean_squared_error
+from sklearn.metrics import root_mean_squared_error
 
 from statsmodels.nonparametric.kde import KDEUnivariate
 
@@ -200,7 +200,7 @@ def _normalize_ipw(propensity, treatment):
 
 def _rmse(y_true, y_pred):
     subset = np.logical_not(np.isnan(y_true))
-    rmse = mean_squared_error(y_true[subset], y_pred[subset], squared=False)
+    rmse = root_mean_squared_error(y_true[subset], y_pred[subset])
     return rmse