[MAINTENANCE] fixed warnings logs (#118)

* feat: updated libs * feat: disable warnings * fix: added handle case then std is zero * fix: ignore warnings for all envs
provectus · Sep 12, 2023 · fe361b6 · fe361b6
1 parent c33ab86
commit fe361b6
Show file tree

Hide file tree

Showing 4 changed files with 25 additions and 3 deletions.
diff --git a/functions/data_test/data_test/data_test.py b/functions/data_test/data_test/data_test.py
@@ -6,10 +6,12 @@
 import json
 from datasource import prepare_final_ds, get_source_name, get_file_extension
 from loguru import logger
+import warnings
 
 
 def handler(event, context):
     logger.info("Starting data test")
+    _ignore_warnings()
     if os.environ['ENVIRONMENT'] == 'local':
         endpoint_url = (f"http://{os.environ['S3_HOST']}:"
                         f"{os.environ['S3_PORT']}")
@@ -90,3 +92,12 @@ def handler(event, context):
     }
     logger.info("Data test is finished successfully")
     return report
+
+
+def _ignore_warnings():
+    warnings.filterwarnings("ignore", category=FutureWarning)
+    warnings.filterwarnings("ignore", category=DeprecationWarning)
+    # issue for this warning
+    # https://github.com/great-expectations/great_expectations/issues/7338
+    warnings.filterwarnings("ignore", category=UserWarning,
+                            message="`result_format` configured at the Validator-level")
diff --git a/functions/data_test/data_test/profiling.py b/functions/data_test/data_test/profiling.py
@@ -242,9 +242,11 @@ def calculate_z_score(summary):
     std = summary["std"]
     maximum = summary["max"]
     significance_level = 0.005
-    threshold = (maximum - mean) / std
     if std and not np.isnan(std):
+        threshold = (maximum - mean) / std
         return threshold + significance_level
+    else:
+        return None
 
 
 def calculate_q_ranges(summary):

diff --git a/functions/data_test/requirements.txt b/functions/data_test/requirements.txt
@@ -1,11 +1,11 @@
 boto3==1.26.66
 botocore==1.29.66
 importlib-metadata==6.0.0
-great-expectations==0.16.14
+great-expectations==0.17.15
 s3fs==0.4.2
 python-dateutil==2.8.2
 fastparquet==0.8.1
 awswrangler==2.19.0
-ydata-profiling==4.2.0
+ydata-profiling==4.5.1
 jinja2==3.0.3
 loguru==0.7.0
diff --git a/functions/data_test/tests/test_profiling.py b/functions/data_test/tests/test_profiling.py
@@ -205,6 +205,15 @@ def test_expectations_z_score(mean, std, max, threshold, applied, before_and_aft
     assert (expectation_type in str(batch.expectation_suite)) == applied
 
 
+@pytest.mark.parametrize("summary, expected_result", [
+    ({"mean": 5.0, "std": 2.0, "max": 10.0}, 2.505),
+    ({"mean": 5.0, "std": 0.0, "max": 10.0}, None),
+    ({"mean": 5.0, "std": np.nan, "max": 10.0}, None),
+    ({"mean": 5.0, "std": 2.0, "max": 5.0}, 0.005), ])
+def test_calculate_z_score(summary, expected_result):
+    assert calculate_z_score(summary) == expected_result
+
+
 @pytest.mark.parametrize("q1,q2,q3,q4,q5,q6",
                          [(912.85, 996.25, 1100.5, 1204.75, 1288.15, 1309)])
 def test_expectations_quantile(q1, q2, q3, q4, q5, q6, before_and_after_test):