diff --git a/python/tests/core/metrics/test_column_metrics.py b/python/tests/core/metrics/test_column_metrics.py index 4a82d60a0..fe63683ac 100644 --- a/python/tests/core/metrics/test_column_metrics.py +++ b/python/tests/core/metrics/test_column_metrics.py @@ -6,6 +6,7 @@ from whylogs.core.metrics import ColumnCountsMetric, TypeCountersMetric from whylogs.core.metrics.metric_components import IntegralComponent from whylogs.core.preprocessing import PreprocessedColumn +from whylogs.core.proto import MetricMessage INTEGER_TYPES = [int, np.intc, np.uintc, np.int_, np.uint, np.longlong, np.ulonglong] FLOAT_TYPES = [float, np.double, np.longdouble, np.float16, np.float64] @@ -112,3 +113,21 @@ def test_column_counts_with_and_without_nan_field(): ) assert full_column_counts.nan.value == 3 assert full_column_counts.inf.value == 4 + + +def test_column_counts_true_count() -> None: + counts = ColumnCountsMetric.zero() + p_col = PreprocessedColumn.apply(["twelve", 12, True, False, True, "True", None]) + operation_result = counts.columnar_update(p_col) + assert operation_result is not None + assert counts.n.value == 7 + assert counts.true.value == 2 + assert operation_result.ok + + # This is a serialized ColumnCountsMetric from before adding the true component + msg = b"\n\t\n\x03nan\x12\x02\x10\x00\n\n\n\x04null\x12\x02\x10\x01\n\t\n\x03inf\x12\x02\x10\x00\n\x07\n\x01n\x12\x02\x10\x07" + buf = MetricMessage() + buf.ParseFromString(msg) + deserialized = ColumnCountsMetric.from_protobuf(buf) + assert deserialized.n.value == 7 + assert deserialized.true.value == 0 diff --git a/python/whylogs/core/metrics/column_metrics.py b/python/whylogs/core/metrics/column_metrics.py index fb36a9fce..732dcd0ea 100644 --- a/python/whylogs/core/metrics/column_metrics.py +++ b/python/whylogs/core/metrics/column_metrics.py @@ -113,6 +113,7 @@ class ColumnCountsMetric(Metric): null: IntegralComponent nan: IntegralComponent = field(default_factory=lambda: IntegralComponent(0)) inf: IntegralComponent = field(default_factory=lambda: IntegralComponent(0)) + true: IntegralComponent = field(default_factory=lambda: IntegralComponent(0)) @property def namespace(self) -> str: @@ -123,6 +124,7 @@ def columnar_update(self, data: PreprocessedColumn) -> OperationResult: null: int = self.null.value nan: int = self.nan.value inf: int = self.inf.value + true: int = self.true.value if data.len <= 0: return OperationResult.ok(0) @@ -138,15 +140,28 @@ def columnar_update(self, data: PreprocessedColumn) -> OperationResult: inf += data.inf_count self.inf.set(inf) + true += data.bool_count_where_true + self.true.set(true) + return OperationResult.ok(data.len) def to_summary_dict(self, cfg: Optional[SummaryConfig] = None) -> Dict[str, Any]: - return {"n": self.n.value, "null": self.null.value, "nan": self.nan.value, "inf": self.inf.value} + return { + "n": self.n.value, + "null": self.null.value, + "nan": self.nan.value, + "inf": self.inf.value, + "true": self.true.value, + } @classmethod def zero(cls, config: Optional[MetricConfig] = None) -> "ColumnCountsMetric": return ColumnCountsMetric( - n=IntegralComponent(0), null=IntegralComponent(0), nan=IntegralComponent(0), inf=IntegralComponent(0) + n=IntegralComponent(0), + null=IntegralComponent(0), + nan=IntegralComponent(0), + inf=IntegralComponent(0), + true=IntegralComponent(0), )