From d67eafcd19cf642ed835fc4fa79d99906d884384 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Thu, 5 Sep 2024 17:26:54 -0400 Subject: [PATCH] [MAINTENANCE] SQLAlchemy 2 typing (#10112) Co-authored-by: Bill Dirks Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Anthony Burdi Co-authored-by: Anthony Burdi Co-authored-by: Nathan Farmer --- great_expectations/compatibility/aws.py | 8 +- .../compatibility/databricks.py | 2 +- great_expectations/compatibility/google.py | 4 +- .../compatibility/sqlalchemy.py | 96 +++++++++---------- great_expectations/core/util.py | 4 +- .../data_context/abstract_data_context.py | 4 +- .../store/database_store_backend.py | 4 +- .../data_context/store/query_store.py | 2 +- great_expectations/data_context/util.py | 2 +- .../datasource/fluent/config.py | 2 +- .../fluent/databricks_sql_datasource.py | 2 +- .../datasource/fluent/pandas_datasource.py | 8 +- .../datasource/fluent/pandas_datasource.pyi | 8 +- .../datasource/fluent/snowflake_datasource.py | 2 +- .../datasource/fluent/sql_datasource.py | 4 +- .../sqlalchemy_data_partitioner.py | 34 +++---- .../sqlalchemy_data_sampler.py | 24 ++--- .../sqlalchemy_execution_engine.py | 62 ++++++------ .../column_distinct_values.py | 8 +- .../column_histogram.py | 8 +- .../column_aggregate_metrics/column_median.py | 4 +- .../column_quantile_values.py | 31 +++--- .../column_value_counts.py | 4 +- .../column_values_between_count.py | 4 +- .../column_values_in_set.py | 2 +- .../column_condition_partial.py | 2 +- ...column_map_condition_auxilliary_methods.py | 8 +- ...n_pair_map_condition_auxilliary_methods.py | 8 +- .../map_condition_auxilliary_methods.py | 52 +++++----- .../multicolumn_condition_partial.py | 2 +- .../multicolumn_function_partial.py | 2 +- ...column_map_condition_auxilliary_methods.py | 10 +- .../metrics/query_metrics/query_column.py | 2 +- .../query_metrics/query_column_pair.py | 2 +- .../query_metrics/query_multiple_columns.py | 2 +- .../metrics/query_metrics/query_table.py | 2 +- .../query_metrics/query_template_values.py | 2 +- .../table_metrics/table_column_types.py | 6 +- .../metrics/table_metrics/table_head.py | 4 +- .../expectations/metrics/util.py | 56 +++++------ .../attributed_resolved_metrics.py | 2 +- great_expectations/self_check/util.py | 26 ++--- great_expectations/util.py | 16 ++-- reqs/requirements-dev-sqlalchemy.txt | 3 +- reqs/requirements-dev-sqlalchemy2.txt | 17 ++++ requirements-types.txt | 7 +- setup.py | 3 +- tests/conftest.py | 4 +- tests/datasource/fluent/conftest.py | 2 +- .../integration/test_sql_datasources.py | 10 +- .../test_sqlalchemy_execution_engine.py | 10 +- .../integration/cloud/end_to_end/conftest.py | 2 +- .../test_expectations_v3_api.py | 2 +- 53 files changed, 309 insertions(+), 288 deletions(-) create mode 100644 reqs/requirements-dev-sqlalchemy2.txt diff --git a/great_expectations/compatibility/aws.py b/great_expectations/compatibility/aws.py index 0e9c8fa9d248..63d7f8cea4f8 100644 --- a/great_expectations/compatibility/aws.py +++ b/great_expectations/compatibility/aws.py @@ -43,16 +43,16 @@ redshiftdialect = REDSHIFT_NOT_IMPORTED try: - import pyathena + import pyathena # type: ignore[import-not-found] except ImportError: - pyathena = ATHENA_NOT_IMPORTED # type: ignore[assignment] + pyathena = ATHENA_NOT_IMPORTED try: from pyathena import sqlalchemy_athena except (ImportError, AttributeError): - sqlalchemy_athena = ATHENA_NOT_IMPORTED # type: ignore[assignment] + sqlalchemy_athena = ATHENA_NOT_IMPORTED try: - from pyathena.sqlalchemy_athena import types as athenatypes + from pyathena.sqlalchemy_athena import types as athenatypes # type: ignore[import-not-found] except (ImportError, AttributeError): athenatypes = ATHENA_NOT_IMPORTED diff --git a/great_expectations/compatibility/databricks.py b/great_expectations/compatibility/databricks.py index d935de38e3fc..e78e5701d500 100644 --- a/great_expectations/compatibility/databricks.py +++ b/great_expectations/compatibility/databricks.py @@ -5,6 +5,6 @@ ) try: - from databricks import connect # type: ignore[import-not-found] + from databricks import connect # type: ignore[import-untyped] except ImportError: connect = DATABRICKS_CONNECT_NOT_IMPORTED diff --git a/great_expectations/compatibility/google.py b/great_expectations/compatibility/google.py index 53cda7b43b2b..03eeccea689e 100644 --- a/great_expectations/compatibility/google.py +++ b/great_expectations/compatibility/google.py @@ -12,9 +12,9 @@ # DeprecationWarning: pkg_resources is deprecated as an API warnings.simplefilter(action="ignore", category=DeprecationWarning) try: - from google.cloud import secretmanager + from google.cloud import secretmanager # type: ignore[attr-defined] except (ImportError, AttributeError): - secretmanager = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED # type: ignore[assignment] + secretmanager = GOOGLE_CLOUD_STORAGE_NOT_IMPORTED try: from google.api_core.exceptions import GoogleAPIError diff --git a/great_expectations/compatibility/sqlalchemy.py b/great_expectations/compatibility/sqlalchemy.py index 8b44ef433ed5..edb518affac0 100644 --- a/great_expectations/compatibility/sqlalchemy.py +++ b/great_expectations/compatibility/sqlalchemy.py @@ -10,119 +10,119 @@ try: import sqlalchemy except ImportError: - sqlalchemy = SQLALCHEMY_NOT_IMPORTED + sqlalchemy = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy.sql.selectable import Subquery except (ImportError, AttributeError): - Subquery = SQLALCHEMY_NOT_IMPORTED + Subquery = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy import engine except ImportError: - engine = SQLALCHEMY_NOT_IMPORTED + engine = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy import dialects except ImportError: - dialects = SQLALCHEMY_NOT_IMPORTED + dialects = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy import inspect except ImportError: - inspect = SQLALCHEMY_NOT_IMPORTED + inspect = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy.dialects import sqlite except (ImportError, AttributeError): - sqlite = SQLALCHEMY_NOT_IMPORTED + sqlite = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy.dialects import registry except (ImportError, AttributeError): - registry = SQLALCHEMY_NOT_IMPORTED + registry = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy.engine import Dialect except (ImportError, AttributeError): - Dialect = SQLALCHEMY_NOT_IMPORTED + Dialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.engine import Inspector except (ImportError, AttributeError): - Inspector = SQLALCHEMY_NOT_IMPORTED + Inspector = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.engine import reflection except (ImportError, AttributeError): - reflection = SQLALCHEMY_NOT_IMPORTED + reflection = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy.engine import Connection except (ImportError, AttributeError): - Connection = SQLALCHEMY_NOT_IMPORTED + Connection = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.engine import Engine except (ImportError, AttributeError): - Engine = SQLALCHEMY_NOT_IMPORTED + Engine = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.engine import Row except (ImportError, AttributeError): - Row = SQLALCHEMY_NOT_IMPORTED + Row = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.engine.row import RowProxy except (ImportError, AttributeError): - RowProxy = SQLALCHEMY_NOT_IMPORTED + RowProxy = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: - from sqlalchemy.engine.row import LegacyRow + from sqlalchemy.engine.row import LegacyRow # type: ignore[attr-defined] except (ImportError, AttributeError): LegacyRow = SQLALCHEMY_NOT_IMPORTED try: from sqlalchemy.engine.default import DefaultDialect except (ImportError, AttributeError): - DefaultDialect = SQLALCHEMY_NOT_IMPORTED + DefaultDialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.engine import url from sqlalchemy.engine.url import URL except (ImportError, AttributeError): - url = SQLALCHEMY_NOT_IMPORTED - URL = SQLALCHEMY_NOT_IMPORTED + url = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] + URL = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.exc import DatabaseError except (ImportError, AttributeError): - DatabaseError = SQLALCHEMY_NOT_IMPORTED + DatabaseError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.exc import IntegrityError except (ImportError, AttributeError): - IntegrityError = SQLALCHEMY_NOT_IMPORTED + IntegrityError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.exc import NoSuchTableError except (ImportError, AttributeError): - NoSuchTableError = SQLALCHEMY_NOT_IMPORTED + NoSuchTableError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.exc import OperationalError except (ImportError, AttributeError): - OperationalError = SQLALCHEMY_NOT_IMPORTED + OperationalError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.exc import ProgrammingError except (ImportError, AttributeError): - ProgrammingError = SQLALCHEMY_NOT_IMPORTED + ProgrammingError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.exc import SQLAlchemyError except (ImportError, AttributeError): - SQLAlchemyError = SQLALCHEMY_NOT_IMPORTED + SQLAlchemyError = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.orm import declarative_base @@ -132,122 +132,122 @@ try: from sqlalchemy.sql import functions except (ImportError, AttributeError): - functions = SQLALCHEMY_NOT_IMPORTED + functions = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy.sql import Insert except (ImportError, AttributeError): - Insert = SQLALCHEMY_NOT_IMPORTED + Insert = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.elements import literal except (ImportError, AttributeError): - literal = SQLALCHEMY_NOT_IMPORTED + literal = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] try: from sqlalchemy.sql.elements import TextClause except (ImportError, AttributeError): - TextClause = SQLALCHEMY_NOT_IMPORTED + TextClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.elements import quoted_name except (ImportError, AttributeError): - quoted_name = SQLALCHEMY_NOT_IMPORTED + quoted_name = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.elements import _anonymous_label except (ImportError, AttributeError): - _anonymous_label = SQLALCHEMY_NOT_IMPORTED + _anonymous_label = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.elements import ColumnElement except (ImportError, AttributeError): - ColumnElement = SQLALCHEMY_NOT_IMPORTED + ColumnElement = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import Cast except (ImportError, AttributeError): - Cast = SQLALCHEMY_NOT_IMPORTED + Cast = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import ColumnOperators except (ImportError, AttributeError): - ColumnOperators = SQLALCHEMY_NOT_IMPORTED + ColumnOperators = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import CTE except (ImportError, AttributeError): - CTE = SQLALCHEMY_NOT_IMPORTED + CTE = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import BinaryExpression except (ImportError, AttributeError): - BinaryExpression = SQLALCHEMY_NOT_IMPORTED + BinaryExpression = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import BooleanClauseList except (ImportError, AttributeError): - BooleanClauseList = SQLALCHEMY_NOT_IMPORTED + BooleanClauseList = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import ColumnClause except (ImportError, AttributeError): - ColumnClause = SQLALCHEMY_NOT_IMPORTED + ColumnClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import Label except (ImportError, AttributeError): - Label = SQLALCHEMY_NOT_IMPORTED + Label = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import Select except (ImportError, AttributeError): - Select = SQLALCHEMY_NOT_IMPORTED + Select = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql import Selectable except (ImportError, AttributeError): - Selectable = SQLALCHEMY_NOT_IMPORTED + Selectable = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import TableClause except (ImportError, AttributeError): - TableClause = SQLALCHEMY_NOT_IMPORTED + TableClause = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import TextualSelect except (ImportError, AttributeError): - TextualSelect = SQLALCHEMY_NOT_IMPORTED + TextualSelect = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.expression import WithinGroup except (ImportError, AttributeError): - WithinGroup = SQLALCHEMY_NOT_IMPORTED + WithinGroup = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.sql.operators import custom_op except (ImportError, AttributeError): - custom_op = SQLALCHEMY_NOT_IMPORTED + custom_op = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: - from sqlalchemy.engine.cursor import LegacyCursorResult + from sqlalchemy.engine.cursor import LegacyCursorResult # type: ignore[attr-defined] except (ImportError, AttributeError): LegacyCursorResult = SQLALCHEMY_NOT_IMPORTED try: from sqlalchemy.engine.cursor import CursorResult except (ImportError, AttributeError): - CursorResult = SQLALCHEMY_NOT_IMPORTED + CursorResult = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy.pool import StaticPool except (ImportError, AttributeError): - StaticPool = SQLALCHEMY_NOT_IMPORTED + StaticPool = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: from sqlalchemy import Table except (ImportError, AttributeError): - Table = SQLALCHEMY_NOT_IMPORTED + Table = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc,assignment] try: __version__: str | None = sqlalchemy.__version__ diff --git a/great_expectations/core/util.py b/great_expectations/core/util.py index 6ff315a739b0..123500685f7e 100644 --- a/great_expectations/core/util.py +++ b/great_expectations/core/util.py @@ -33,8 +33,8 @@ if not LegacyRow: LegacyRow = SQLALCHEMY_NOT_IMPORTED -if not Row: - Row = SQLALCHEMY_NOT_IMPORTED +if not Row: # type: ignore[truthy-function] + Row = SQLALCHEMY_NOT_IMPORTED # type: ignore[misc] SCHEMAS = { "api_np": { diff --git a/great_expectations/data_context/data_context/abstract_data_context.py b/great_expectations/data_context/data_context/abstract_data_context.py index 1e714a0055a4..da307c5fce6a 100644 --- a/great_expectations/data_context/data_context/abstract_data_context.py +++ b/great_expectations/data_context/data_context/abstract_data_context.py @@ -89,10 +89,10 @@ from great_expectations.validator.validator import Validator SQLAlchemyError = sqlalchemy.SQLAlchemyError -if not SQLAlchemyError: +if not SQLAlchemyError: # type: ignore[truthy-function] # We'll redefine this error in code below to catch ProfilerError, which is caught above, so SA errors will # noqa: E501 # just fall through - SQLAlchemyError = gx_exceptions.ProfilerError + SQLAlchemyError = gx_exceptions.ProfilerError # type: ignore[misc] if TYPE_CHECKING: diff --git a/great_expectations/data_context/store/database_store_backend.py b/great_expectations/data_context/store/database_store_backend.py index e5710b52f426..b75071ce6a33 100644 --- a/great_expectations/data_context/store/database_store_backend.py +++ b/great_expectations/data_context/store/database_store_backend.py @@ -266,9 +266,9 @@ def _set(self, key, value, allow_update=True, **kwargs) -> None: .values(**cols) ) else: - ins = self._table.insert().values(**cols) + ins = self._table.insert().values(**cols) # type: ignore[assignment] else: - ins = self._table.insert().values(**cols) + ins = self._table.insert().values(**cols) # type: ignore[assignment] try: with self.engine.begin() as connection: diff --git a/great_expectations/data_context/store/query_store.py b/great_expectations/data_context/store/query_store.py index 1b431e86a9f6..ff4ca68eefce 100644 --- a/great_expectations/data_context/store/query_store.py +++ b/great_expectations/data_context/store/query_store.py @@ -17,7 +17,7 @@ if is_version_greater_or_equal(sa.__version__, "1.4.0"): url_create_fn = sqlalchemy.URL.create else: - url_create_fn = sqlalchemy.URL + url_create_fn = sqlalchemy.URL # type: ignore[assignment] logger = logging.getLogger(__name__) diff --git a/great_expectations/data_context/util.py b/great_expectations/data_context/util.py index 9941d4ce2006..1c02d27153da 100644 --- a/great_expectations/data_context/util.py +++ b/great_expectations/data_context/util.py @@ -19,7 +19,7 @@ try: import sqlalchemy as sa # noqa: TID251 except ImportError: - sa = None + sa = None # type: ignore[assignment] logger = logging.getLogger(__name__) diff --git a/great_expectations/datasource/fluent/config.py b/great_expectations/datasource/fluent/config.py index 9fddc7c2f058..0a0135046e11 100644 --- a/great_expectations/datasource/fluent/config.py +++ b/great_expectations/datasource/fluent/config.py @@ -76,7 +76,7 @@ _MISSING: Final = object() JSON_ENCODERS: dict[Type, Callable] = {} -if TextClause: +if TextClause: # type: ignore[truthy-function] JSON_ENCODERS[TextClause] = lambda v: str(v) T = TypeVar("T") diff --git a/great_expectations/datasource/fluent/databricks_sql_datasource.py b/great_expectations/datasource/fluent/databricks_sql_datasource.py index a7486ac98e3b..88e300aa2eba 100644 --- a/great_expectations/datasource/fluent/databricks_sql_datasource.py +++ b/great_expectations/datasource/fluent/databricks_sql_datasource.py @@ -143,7 +143,7 @@ def _resolve_quoted_name(cls, table_name: str) -> str | quoted_name: from great_expectations.compatibility import sqlalchemy - if sqlalchemy.quoted_name: + if sqlalchemy.quoted_name: # type: ignore[truthy-function] if isinstance(table_name, sqlalchemy.quoted_name): return table_name diff --git a/great_expectations/datasource/fluent/pandas_datasource.py b/great_expectations/datasource/fluent/pandas_datasource.py index a7f65b2584d2..487fd94c7bba 100644 --- a/great_expectations/datasource/fluent/pandas_datasource.py +++ b/great_expectations/datasource/fluent/pandas_datasource.py @@ -1427,7 +1427,7 @@ def read_spss( def add_sql_asset( self, name: str, - sql: sa.select | sa.text | str, + sql: sa.select | sa.text | str, # type: ignore[valid-type] con: sqlalchemy.Engine | sqlite3.Connection | str, **kwargs, ) -> SQLAsset: # type: ignore[valid-type] @@ -1454,7 +1454,7 @@ def add_sql_asset( @public_api def read_sql( self, - sql: sa.select | sa.text | str, + sql: sa.select | sa.text | str, # type: ignore[valid-type] con: sqlalchemy.Engine | sqlite3.Connection | str, asset_name: Optional[str] = None, **kwargs, @@ -1484,7 +1484,7 @@ def read_sql( def add_sql_query_asset( self, name: str, - sql: sa.select | sa.text | str, + sql: sa.select | sa.text | str, # type: ignore[valid-type] con: sqlalchemy.Engine | sqlite3.Connection | str, **kwargs, ) -> SQLQueryAsset: # type: ignore[valid-type] @@ -1511,7 +1511,7 @@ def add_sql_query_asset( @public_api def read_sql_query( self, - sql: sa.select | sa.text | str, + sql: sa.select | sa.text | str, # type: ignore[valid-type] con: sqlalchemy.Engine | sqlite3.Connection | str, asset_name: Optional[str] = None, **kwargs, diff --git a/great_expectations/datasource/fluent/pandas_datasource.pyi b/great_expectations/datasource/fluent/pandas_datasource.pyi index 1bcd34c39c1e..a84457d1b929 100644 --- a/great_expectations/datasource/fluent/pandas_datasource.pyi +++ b/great_expectations/datasource/fluent/pandas_datasource.pyi @@ -434,7 +434,7 @@ class PandasDatasource(_PandasDatasource): def add_sql_asset( # noqa: PLR0913 self, name: str, - sql: sa.select | sa.text | str, + sql: sa.select | sa.text | str, # type: ignore[valid-type] con: sqlalchemy.Engine | sqlite3.Connection | str, *, batch_metadata: Optional[BatchMetadata] = ..., @@ -448,7 +448,7 @@ class PandasDatasource(_PandasDatasource): def add_sql_query_asset( # noqa: PLR0913 self, name: str, - sql: sa.select | sa.text | str, + sql: sa.select | sa.text | str, # type: ignore[valid-type] con: sqlalchemy.Engine | sqlite3.Connection | str, *, batch_metadata: Optional[BatchMetadata] = ..., @@ -814,7 +814,7 @@ class PandasDatasource(_PandasDatasource): ) -> Batch: ... def read_sql( # noqa: PLR0913 self, - sql: sa.select | sa.text | str, + sql: sa.select | sa.text | str, # type: ignore[valid-type] con: sqlalchemy.Engine | sqlite3.Connection | str, *, asset_name: Optional[str] = ..., @@ -828,7 +828,7 @@ class PandasDatasource(_PandasDatasource): ) -> Batch: ... def read_sql_query( # noqa: PLR0913 self, - sql: sa.select | sa.text | str, + sql: sa.select | sa.text | str, # type: ignore[valid-type] con: sqlalchemy.Engine | sqlite3.Connection | str, *, asset_name: Optional[str] = ..., diff --git a/great_expectations/datasource/fluent/snowflake_datasource.py b/great_expectations/datasource/fluent/snowflake_datasource.py index b413a0b07f3d..8bd3fddc9e12 100644 --- a/great_expectations/datasource/fluent/snowflake_datasource.py +++ b/great_expectations/datasource/fluent/snowflake_datasource.py @@ -809,4 +809,4 @@ def _build_engine_with_connect_args( engine_kwargs["url"] = url - return sa.create_engine(**engine_kwargs) + return sa.create_engine(**engine_kwargs) # type: ignore[misc] diff --git a/great_expectations/datasource/fluent/sql_datasource.py b/great_expectations/datasource/fluent/sql_datasource.py index 95f7bc56b472..537ca5e301e4 100644 --- a/great_expectations/datasource/fluent/sql_datasource.py +++ b/great_expectations/datasource/fluent/sql_datasource.py @@ -857,7 +857,7 @@ def _resolve_quoted_name(cls, table_name: str) -> str | quoted_name: from great_expectations.compatibility import sqlalchemy - if sqlalchemy.quoted_name: + if sqlalchemy.quoted_name: # type: ignore[truthy-function] if isinstance(table_name, sqlalchemy.quoted_name): return table_name @@ -909,7 +909,7 @@ def as_selectable(self) -> sqlalchemy.Selectable: This can be used in a from clause for a query against this data. """ - return sa.text(self.qualified_name) + return sa.text(self.qualified_name) # type: ignore[return-value] @override def _create_batch_spec_kwargs(self) -> dict[str, Any]: diff --git a/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_partitioner.py b/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_partitioner.py index b603961c3ae9..572f4e2f3921 100644 --- a/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_partitioner.py +++ b/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_partitioner.py @@ -167,7 +167,7 @@ def partition_on_date_parts( column_batch_identifiers, date_parts ) - query: Union[sqlalchemy.BinaryExpression, sqlalchemy.BooleanClauseList] = sa.and_( + query: Union[sqlalchemy.BinaryExpression, sqlalchemy.BooleanClauseList] = sa.and_( # type: ignore[assignment] *[ sa.extract(date_part.value, sa.column(column_name)) == date_parts_dict[date_part.value] @@ -284,7 +284,7 @@ def partition_on_multi_column_values( ) -> bool: """Partition on the joint values in the named columns""" - return sa.and_( + return sa.and_( # type: ignore[return-value] *( sa.column(column_name) == column_value for column_name, column_value in batch_identifiers.items() @@ -477,7 +477,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_par concat_date_parts: sqlalchemy.Cast | sqlalchemy.ColumnOperators if len(date_parts) == 1: # MSSql does not accept single item concatenation - concat_clause = sa.func.distinct( + concat_clause = sa.func.distinct( # type: ignore[assignment] sa.func.extract(date_parts[0].value, sa.column(column_name)).label( date_parts[0].value ) @@ -503,7 +503,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_par ) ) - concat_clause = sa.func.distinct(concat_date_parts).label("concat_distinct_values") + concat_clause = sa.func.distinct(concat_date_parts).label("concat_distinct_values") # type: ignore[assignment] else: concat_date_parts = sa.func.concat( "", @@ -522,9 +522,9 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_date_par ), ) - concat_clause = sa.func.distinct(concat_date_parts).label("concat_distinct_values") + concat_clause = sa.func.distinct(concat_date_parts).label("concat_distinct_values") # type: ignore[assignment] - partitioned_query: sqlalchemy.Selectable = sa.select( + partitioned_query: sqlalchemy.Selectable = sa.select( # type: ignore[call-overload] concat_clause, *[ sa.cast(sa.func.extract(date_part.value, sa.column(column_name)), sa.Integer).label( @@ -722,7 +722,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_column_v """Partition using the values in the named column""" return ( sa.select(sa.func.distinct(sa.column(column_name))) - .select_from(selectable) + .select_from(selectable) # type: ignore[arg-type] .order_by(sa.column(column_name).asc()) ) @@ -741,7 +741,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_converte sa.column(column_name), ) ) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] raise NotImplementedError( f'Partitioner method "partition_on_converted_datetime" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 @@ -762,7 +762,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] if self._dialect == GXSqlDialect.MYSQL: return sa.select( @@ -775,7 +775,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] if self._dialect == GXSqlDialect.MSSQL: return sa.select( @@ -789,7 +789,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] if self._dialect == GXSqlDialect.AWSATHENA: return sa.select( @@ -799,7 +799,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] return sa.select( sa.func.distinct( @@ -808,7 +808,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_divided_ sa.Integer, ) ) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] def get_partition_query_for_data_for_batch_identifiers_for_partition_on_mod_integer( self, @@ -823,11 +823,11 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_mod_inte ]: return sa.select( sa.func.distinct(sa.cast(sa.column(column_name), sa.Integer) % mod) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] return sa.select( sa.func.distinct(sa.func.mod(sa.cast(sa.column(column_name), sa.Integer), mod)) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] @staticmethod def get_partition_query_for_data_for_batch_identifiers_for_partition_on_multi_column_values( @@ -838,7 +838,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_multi_co return ( sa.select(*[sa.column(column_name) for column_name in column_names]) .distinct() - .select_from(selectable) + .select_from(selectable) # type: ignore[arg-type] ) def get_partition_query_for_data_for_batch_identifiers_for_partition_on_hashed_column( @@ -853,7 +853,7 @@ def get_partition_query_for_data_for_batch_identifiers_for_partition_on_hashed_c sa.func.distinct( sa.func.md5(sa.cast(sa.column(column_name), sa.VARCHAR), hash_digits) ) - ).select_from(selectable) + ).select_from(selectable) # type: ignore[arg-type] raise NotImplementedError( f'Partitioner method "partition_on_hashed_column" is not supported for "{self._dialect}" SQL dialect.' # noqa: E501 diff --git a/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_sampler.py b/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_sampler.py index b1c2d03828f0..30782daba833 100644 --- a/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_sampler.py +++ b/great_expectations/execution_engine/partition_and_sample/sqlalchemy_data_sampler.py @@ -46,9 +46,9 @@ def sample_using_limit( # Partition clause should be permissive of all values if not supplied. if where_clause is None: if execution_engine.dialect_name == GXSqlDialect.SQLITE: - where_clause = sa.text("1 = 1") + where_clause = sa.text("1 = 1") # type: ignore[assignment] else: - where_clause = sa.true() + where_clause = sa.true() # type: ignore[assignment] table_name: str = batch_spec["table_name"] @@ -61,7 +61,7 @@ def sample_using_limit( raw_query: sqlalchemy.Selectable = ( sa.select("*") .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) + .where(where_clause) # type: ignore[arg-type] ) query: str = str( raw_query.compile( @@ -77,7 +77,7 @@ def sample_using_limit( selectable_query: sqlalchemy.Selectable = ( sa.select("*") .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) + .where(where_clause) # type: ignore[arg-type] .limit(batch_spec["sampling_kwargs"]["n"]) ) string_of_query: str = str( @@ -93,9 +93,9 @@ def sample_using_limit( return string_of_query else: return ( - sa.select("*") + sa.select("*") # type: ignore[return-value] .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) + .where(where_clause) # type: ignore[arg-type] .limit(batch_spec["sampling_kwargs"]["n"]) ) @@ -154,16 +154,16 @@ def sample_using_random( "the 'sampling_kwargs' configuration." ) from e - num_rows: int = execution_engine.execute_query( + num_rows: int = execution_engine.execute_query( # type: ignore[assignment] sa.select(sa.func.count()) .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) + .where(where_clause) # type: ignore[arg-type] ).scalar() sample_size: int = round(p * num_rows) return ( sa.select("*") .select_from(sa.table(table_name, schema=batch_spec.get("schema_name", None))) - .where(where_clause) + .where(where_clause) # type: ignore[arg-type] .order_by(sa.func.random()) .limit(sample_size) ) @@ -191,7 +191,7 @@ def sample_using_mod( mod: int = self.get_sampling_kwargs_value_or_default(batch_spec, "mod") value: int = self.get_sampling_kwargs_value_or_default(batch_spec, "value") - return sa.column(column_name) % mod == value + return sa.column(column_name) % mod == value # type: ignore[return-value] def sample_using_a_list( self, @@ -213,7 +213,7 @@ def sample_using_a_list( self.verify_batch_spec_sampling_kwargs_key_exists("value_list", batch_spec) column_name: str = self.get_sampling_kwargs_value_or_default(batch_spec, "column_name") value_list: list = self.get_sampling_kwargs_value_or_default(batch_spec, "value_list") - return sa.column(column_name).in_(value_list) + return sa.column(column_name).in_(value_list) # type: ignore[return-value] def sample_using_md5( self, @@ -242,6 +242,6 @@ def sample_using_md5( ) return ( - sa.func.right(sa.func.md5(sa.cast(sa.column(column_name), sa.Text)), hash_digits) + sa.func.right(sa.func.md5(sa.cast(sa.column(column_name), sa.Text)), hash_digits) # type: ignore[return-value] == hash_value ) diff --git a/great_expectations/execution_engine/sqlalchemy_execution_engine.py b/great_expectations/execution_engine/sqlalchemy_execution_engine.py index 44a3dbf92a94..b687cd098483 100644 --- a/great_expectations/execution_engine/sqlalchemy_execution_engine.py +++ b/great_expectations/execution_engine/sqlalchemy_execution_engine.py @@ -105,13 +105,13 @@ import psycopg2 # noqa: F401 import sqlalchemy.dialects.postgresql.psycopg2 as sqlalchemy_psycopg2 # noqa: TID251 except (ImportError, KeyError): - sqlalchemy_psycopg2 = None + sqlalchemy_psycopg2 = None # type: ignore[assignment] try: import sqlalchemy_dremio.pyodbc if sa: - sa.dialects.registry.register(GXSqlDialect.DREMIO, "sqlalchemy_dremio.pyodbc", "dialect") + sa.dialects.registry.register(GXSqlDialect.DREMIO, "sqlalchemy_dremio.pyodbc", "dialect") # type: ignore[arg-type] except ImportError: sqlalchemy_dremio = None @@ -119,7 +119,7 @@ if sa: # Sometimes "snowflake-sqlalchemy" fails to self-register in certain environments, so we do it explicitly. # noqa: E501 # (see https://stackoverflow.com/questions/53284762/nosuchmoduleerror-cant-load-plugin-sqlalchemy-dialectssnowflake) - sa.dialects.registry.register(GXSqlDialect.SNOWFLAKE, "snowflake.sqlalchemy", "dialect") + sa.dialects.registry.register(GXSqlDialect.SNOWFLAKE, "snowflake.sqlalchemy", "dialect") # type: ignore[arg-type] from great_expectations.compatibility.bigquery import ( _BIGQUERY_MODULE_NAME, @@ -130,7 +130,7 @@ ) if sqla_bigquery and sa: - sa.dialects.registry.register(GXSqlDialect.BIGQUERY, _BIGQUERY_MODULE_NAME, "BigQueryDialect") + sa.dialects.registry.register(GXSqlDialect.BIGQUERY, _BIGQUERY_MODULE_NAME, "BigQueryDialect") # type: ignore[arg-type] try: import teradatasqlalchemy.dialect @@ -421,7 +421,7 @@ def _on_connect(dbapi_con, connection_record): "name": name, "credentials": credentials, "data_context": data_context, - "engine": engine, + "engine": engine, # type: ignore[dict-item] "connection_string": connection_string, "url": url, "batch_data_dict": batch_data_dict, @@ -644,7 +644,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 as a subquery wrapped in "(subquery) alias". TextClause must first be converted to TextualSelect using sa.columns() before it can be converted to type Subquery """ - if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): + if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] selectable = selectable.columns().subquery() # Filtering by row condition. @@ -652,7 +652,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 condition_parser = domain_kwargs["condition_parser"] if condition_parser == "great_expectations__experimental__": parsed_condition = parse_condition_to_sqlalchemy(domain_kwargs["row_condition"]) - selectable = sa.select(sa.text("*")).select_from(selectable).where(parsed_condition) + selectable = sa.select(sa.text("*")).select_from(selectable).where(parsed_condition) # type: ignore[arg-type] else: raise GreatExpectationsError( # noqa: TRY003 "SqlAlchemyExecutionEngine only supports the great_expectations condition_parser." # noqa: E501 @@ -670,11 +670,11 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 # SQLAlchemy 2.0 deprecated select_from() from a non-Table asset without a subquery. # Implicit coercion of SELECT and textual SELECT constructs into FROM clauses is deprecated. # noqa: E501 if not isinstance(selectable, (sa.Table, Subquery)): - selectable = selectable.subquery() + selectable = selectable.subquery() # type: ignore[attr-defined] selectable = ( sa.select(sa.text("*")) - .select_from(selectable) + .select_from(selectable) # type: ignore[arg-type] .where(parse_condition_to_sqlalchemy(filter_condition.condition)) ) elif len(filter_conditions) > 1: @@ -707,7 +707,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 if ignore_row_if == "both_values_are_missing": selectable = get_sqlalchemy_selectable( sa.select(sa.text("*")) - .select_from(get_sqlalchemy_selectable(selectable)) + .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] .where( sa.not_( sa.and_( @@ -720,7 +720,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 elif ignore_row_if == "either_value_is_missing": selectable = get_sqlalchemy_selectable( sa.select(sa.text("*")) - .select_from(get_sqlalchemy_selectable(selectable)) + .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] .where( sa.not_( sa.or_( @@ -750,7 +750,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 if ignore_row_if == "all_values_are_missing": selectable = get_sqlalchemy_selectable( sa.select(sa.text("*")) - .select_from(get_sqlalchemy_selectable(selectable)) + .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] .where( sa.not_( sa.and_( @@ -765,7 +765,7 @@ def get_domain_records( # noqa: C901, PLR0912, PLR0915 elif ignore_row_if == "any_value_is_missing": selectable = get_sqlalchemy_selectable( sa.select(sa.text("*")) - .select_from(get_sqlalchemy_selectable(selectable)) + .select_from(get_sqlalchemy_selectable(selectable)) # type: ignore[arg-type] .where( sa.not_( sa.or_( @@ -1014,19 +1014,19 @@ def resolve_metric_bundle( # noqa: C901 - too complex as a subquery wrapped in "(subquery) alias". TextClause must first be converted to TextualSelect using sa.columns() before it can be converted to type Subquery """ - if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): + if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] sa_query_object = sa.select(*query["select"]).select_from( selectable.columns().subquery() ) - elif (sqlalchemy.Select and isinstance(selectable, sqlalchemy.Select)) or ( - sqlalchemy.TextualSelect and isinstance(selectable, sqlalchemy.TextualSelect) + elif (sqlalchemy.Select and isinstance(selectable, sqlalchemy.Select)) or ( # type: ignore[truthy-function] + sqlalchemy.TextualSelect and isinstance(selectable, sqlalchemy.TextualSelect) # type: ignore[truthy-function] ): sa_query_object = sa.select(*query["select"]).select_from(selectable.subquery()) else: - sa_query_object = sa.select(*query["select"]).select_from(selectable) + sa_query_object = sa.select(*query["select"]).select_from(selectable) # type: ignore[arg-type] logger.debug(f"Attempting query {sa_query_object!s}") - res = self.execute_query(sa_query_object).fetchall() + res = self.execute_query(sa_query_object).fetchall() # type: ignore[assignment] logger.debug( f"""SqlAlchemyExecutionEngine computed {len(res[0])} metrics on domain_id \ @@ -1104,14 +1104,14 @@ def execute_partitioned_query( # Note: Athena does not support casting to string, only to varchar # but sqlalchemy currently generates a query as `CAST(colname AS STRING)` instead # of `CAST(colname AS VARCHAR)` with other dialects. - partitioned_query = str( + partitioned_query = str( # type: ignore[assignment] partitioned_query.compile(self.engine, compile_kwargs={"literal_binds": True}) ) pattern = re.compile(r"(CAST\(EXTRACT\(.*?\))( AS STRING\))", re.IGNORECASE) - partitioned_query = re.sub(pattern, r"\1 AS VARCHAR)", partitioned_query) + partitioned_query = re.sub(pattern, r"\1 AS VARCHAR)", partitioned_query) # type: ignore[call-overload] - return self.execute_query(partitioned_query).fetchall() + return self.execute_query(partitioned_query).fetchall() # type: ignore[return-value] def get_data_for_batch_identifiers( self, @@ -1179,7 +1179,7 @@ def _build_selectable_from_batch_spec(self, batch_spec: BatchSpec) -> sqlalchemy sampler_fn = self._data_sampler.get_sampler_method(sampling_method) return ( sa.select("*") - .select_from(selectable) + .select_from(selectable) # type: ignore[arg-type] .where( sa.and_( partition_clause, @@ -1188,7 +1188,7 @@ def _build_selectable_from_batch_spec(self, batch_spec: BatchSpec) -> sqlalchemy ) ) - return sa.select("*").select_from(selectable).where(partition_clause) + return sa.select("*").select_from(selectable).where(partition_clause) # type: ignore[arg-type] def _subselectable(self, batch_spec: BatchSpec) -> sqlalchemy.Selectable: table_name = batch_spec.get("table_name") @@ -1272,14 +1272,14 @@ def get_inspector(self) -> sqlalchemy.engine.reflection.Inspector: if self._inspector is None: if version.parse(sa.__version__) < version.parse("1.4"): # Inspector.from_engine deprecated since 1.4, sa.inspect() should be used instead - self._inspector = sqlalchemy.reflection.Inspector.from_engine(self.engine) + self._inspector = sqlalchemy.reflection.Inspector.from_engine(self.engine) # type: ignore[assignment] else: - self._inspector = sa.inspect(self.engine) + self._inspector = sa.inspect(self.engine) # type: ignore[assignment] - return self._inspector + return self._inspector # type: ignore[return-value] - @contextmanager - def get_connection(self) -> sqlalchemy.Connection: + @contextmanager # type: ignore[arg-type] + def get_connection(self) -> sqlalchemy.Connection: # type: ignore[misc] """Get a connection for executing queries. Some databases sqlite/mssql temp tables only persist within a connection, @@ -1294,7 +1294,7 @@ def get_connection(self) -> sqlalchemy.Connection: if self.dialect_name in _PERSISTED_CONNECTION_DIALECTS: try: if not self._connection: - self._connection = self.engine.connect() + self._connection = self.engine.connect() # type: ignore[assignment] yield self._connection finally: # Temp tables only persist within a connection for some dialects, @@ -1316,7 +1316,7 @@ def execute_query( Returns: CursorResult for sqlalchemy 2.0+ or LegacyCursorResult for earlier versions. """ - with self.get_connection() as connection: + with self.get_connection() as connection: # type: ignore[var-annotated] result = connection.execute(query) return result @@ -1335,7 +1335,7 @@ def execute_query_in_transaction( Returns: CursorResult for sqlalchemy 2.0+ or LegacyCursorResult for earlier versions. """ # noqa: E501 - with self.get_connection() as connection: + with self.get_connection() as connection: # type: ignore[var-annotated] if ( is_version_greater_or_equal(sqlalchemy.sqlalchemy.__version__, "2.0.0") and not connection.closed diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_distinct_values.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_distinct_values.py index a3934941c078..bafaaac49823 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_distinct_values.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_distinct_values.py @@ -62,12 +62,12 @@ def _sqlalchemy( distinct_values: List[sqlalchemy.Row] if hasattr(column, "is_not"): - distinct_values = execution_engine.execute_query( - sa.select(column).where(column.is_not(None)).distinct().select_from(selectable) + distinct_values = execution_engine.execute_query( # type: ignore[assignment] + sa.select(column).where(column.is_not(None)).distinct().select_from(selectable) # type: ignore[arg-type] ).fetchall() else: - distinct_values = execution_engine.execute_query( - sa.select(column).where(column.isnot(None)).distinct().select_from(selectable) + distinct_values = execution_engine.execute_query( # type: ignore[assignment] + sa.select(column).where(column.isnot(None)).distinct().select_from(selectable) # type: ignore[arg-type] ).fetchall() # Vectorized operation is not faster here due to overhead of converting to and from numpy array # noqa: E501 return {row[0] for row in distinct_values} diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py index 295ef7d504f4..8f874b02d5e8 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_histogram.py @@ -119,12 +119,12 @@ def _sqlalchemy( .where( sa.column(column) != None, # noqa: E711 ) - .select_from(selectable) + .select_from(selectable) # type: ignore[arg-type] ) # Run the data through convert_to_json_serializable to ensure we do not have Decimal types # noqa: E501 return convert_to_json_serializable( - list(execution_engine.execute_query(query).fetchone()) + list(execution_engine.execute_query(query).fetchone()) # type: ignore[arg-type] ) idx = 0 @@ -201,11 +201,11 @@ def _sqlalchemy( .where( sa.column(column) != None, # noqa: E711 ) - .select_from(selectable) + .select_from(selectable) # type: ignore[arg-type] ) # Run the data through convert_to_json_serializable to ensure we do not have Decimal types - return convert_to_json_serializable(list(execution_engine.execute_query(query).fetchone())) + return convert_to_json_serializable(list(execution_engine.execute_query(query).fetchone())) # type: ignore[arg-type] @metric_value(engine=SparkDFExecutionEngine) def _spark( # noqa: C901 diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py index b6a80c283c57..0ffd6da68e08 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_median.py @@ -55,7 +55,7 @@ def _sqlalchemy( accessor_domain_kwargs, ) = execution_engine.get_compute_domain(metric_domain_kwargs, MetricDomainTypes.COLUMN) column_name = accessor_domain_kwargs["column"] - column = sa.column(column_name) + column = sa.column(column_name) # type: ignore[var-annotated] """SqlAlchemy Median Implementation""" nonnull_count = metrics.get("column_values.nonnull.count") if not nonnull_count: @@ -67,7 +67,7 @@ def _sqlalchemy( .where(column != None) # noqa: E711 .offset(max(nonnull_count // 2 - 1, 0)) .limit(2) - .select_from(selectable) + .select_from(selectable) # type: ignore[arg-type] ) column_values = list(element_values.fetchall()) diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py index 82e30d41cf3c..f7c6500d7ac9 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_quantile_values.py @@ -68,7 +68,7 @@ def _sqlalchemy( # noqa: C901, PLR0911 metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN ) column_name = accessor_domain_kwargs["column"] - column = sa.column(column_name) + column = sa.column(column_name) # type: ignore[var-annotated] dialect_name = execution_engine.dialect_name quantiles = metric_value_kwargs["quantiles"] allow_relative_error = metric_value_kwargs.get("allow_relative_error", False) @@ -96,7 +96,7 @@ def _sqlalchemy( # noqa: C901, PLR0911 ) elif dialect_name.lower() == GXSqlDialect.CLICKHOUSE: return _get_column_quantiles_clickhouse( - column=column, + column=column, # type: ignore[arg-type] quantiles=quantiles, selectable=selectable, execution_engine=execution_engine, @@ -180,7 +180,7 @@ def _spark( "SparkDFExecutionEngine requires relative error to be False or to be a float between 0 and 1." # noqa: E501 ) - return df.approxQuantile(column, list(quantiles), allow_relative_error) + return df.approxQuantile(column, list(quantiles), allow_relative_error) # type: ignore[attr-defined] def _get_column_quantiles_mssql( @@ -188,14 +188,14 @@ def _get_column_quantiles_mssql( ) -> list: # mssql requires over(), so we add an empty over() clause selects: list[sqlalchemy.WithinGroup] = [ - sa.func.percentile_disc(quantile).within_group(column.asc()).over() + sa.func.percentile_disc(quantile).within_group(column.asc()).over() # type: ignore[misc] for quantile in quantiles ] quantiles_query: sqlalchemy.Select = sa.select(*selects).select_from(selectable) try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results) + return list(quantiles_results) # type: ignore[arg-type] except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() @@ -209,13 +209,14 @@ def _get_column_quantiles_bigquery( ) -> list: # BigQuery does not support "WITHIN", so we need a special case for it selects: list[sqlalchemy.WithinGroup] = [ - sa.func.percentile_disc(column, quantile).over() for quantile in quantiles + sa.func.percentile_disc(column, quantile).over() # type: ignore[misc] + for quantile in quantiles ] quantiles_query: sqlalchemy.Select = sa.select(*selects).select_from(selectable) try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results) + return list(quantiles_results) # type: ignore[arg-type] except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() @@ -261,14 +262,14 @@ def _get_column_quantiles_mysql( ) .label(f"q_{idx}") ) - selects.append(quantile_column) + selects.append(quantile_column) # type: ignore[arg-type] quantiles_query: sqlalchemy.Select = ( sa.select(*selects).distinct().order_by(percent_rank_query.columns.p.desc()) ) try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results) + return list(quantiles_results) # type: ignore[arg-type] except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() @@ -287,7 +288,7 @@ def _get_column_quantiles_trino( try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results)[0] + return list(quantiles_results)[0] # type: ignore[arg-type] except (sqlalchemy.ProgrammingError, trino.trinoexceptions.TrinoUserError) as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() @@ -302,7 +303,7 @@ def _get_column_quantiles_clickhouse( quantiles_list = list(quantiles) sql_approx: str = f"quantilesExact({', '.join([str(x) for x in quantiles_list])})({column})" selects_approx: list[sqlalchemy.TextClause] = [sa.text(sql_approx)] - quantiles_query: sqlalchemy.Select = sa.select(selects_approx).select_from(selectable) + quantiles_query: sqlalchemy.Select = sa.select(selects_approx).select_from(selectable) # type: ignore[call-overload] try: quantiles_results = execution_engine.execute(quantiles_query).fetchone()[0] return quantiles_results @@ -341,7 +342,7 @@ def _get_column_quantiles_sqlite( ] return list( itertools.chain.from_iterable( - [list(quantile_result) for quantile_result in quantiles_results] + [list(quantile_result) for quantile_result in quantiles_results] # type: ignore[arg-type] ) ) except sqlalchemy.ProgrammingError as pe: @@ -364,7 +365,7 @@ def _get_column_quantiles_athena( try: quantiles_results = execution_engine.execute_query(quantiles_query_approx).fetchone() # the ast literal eval is needed because the method is returning a json string and not a dict # noqa: E501 - results = ast.literal_eval(quantiles_results[0]) + results = ast.literal_eval(quantiles_results[0]) # type: ignore[index] return results except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." @@ -392,7 +393,7 @@ def _get_column_quantiles_generic_sqlalchemy( try: quantiles_results = execution_engine.execute_query(quantiles_query).fetchone() - return list(quantiles_results) + return list(quantiles_results) # type: ignore[arg-type] except sqlalchemy.ProgrammingError: # ProgrammingError: (psycopg2.errors.SyntaxError) Aggregate function "percentile_disc" is not supported; # noqa: E501 # use approximate percentile_disc or percentile_cont instead. @@ -410,7 +411,7 @@ def _get_column_quantiles_generic_sqlalchemy( quantiles_results = execution_engine.execute_query( quantiles_query_approx ).fetchone() - return list(quantiles_results) + return list(quantiles_results) # type: ignore[arg-type] except sqlalchemy.ProgrammingError as pe: exception_message: str = "An SQL syntax Exception occurred." exception_traceback: str = traceback.format_exc() diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py index eb6bdbdd4db6..ca3d5ba85237 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_value_counts.py @@ -124,8 +124,8 @@ def _sqlalchemy( query = query.order_by(sa.column(column)) elif sort == "count": query = query.order_by(sa.column("count").desc()) - results: List[sqlalchemy.Row] = execution_engine.execute_query( - query.select_from(selectable) + results: List[sqlalchemy.Row] = execution_engine.execute_query( # type: ignore[assignment] + query.select_from(selectable) # type: ignore[arg-type] ).fetchall() # Numpy does not always infer the correct DataTypes for SqlAlchemy Row, so we cannot use vectorized approach. # noqa: E501 series = pd.Series( diff --git a/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py b/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py index f9b7ed3af58c..231e1b7ab1d2 100644 --- a/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py +++ b/great_expectations/expectations/metrics/column_aggregate_metrics/column_values_between_count.py @@ -154,7 +154,7 @@ def _sqlalchemy( # noqa: C901, PLR0912 ) = execution_engine.get_compute_domain( domain_kwargs=metric_domain_kwargs, domain_type=MetricDomainTypes.COLUMN ) - column = sa.column(accessor_domain_kwargs["column"]) + column = sa.column(accessor_domain_kwargs["column"]) # type: ignore[var-annotated] if min_value is None: if strict_max: @@ -179,7 +179,7 @@ def _sqlalchemy( # noqa: C901, PLR0912 condition = sa.and_(column >= min_value, column <= max_value) return execution_engine.execute_query( - sa.select(sa.func.count()).select_from(selectable).where(condition) + sa.select(sa.func.count()).select_from(selectable).where(condition) # type: ignore[arg-type] ).scalar() @metric_value(engine=SparkDFExecutionEngine) diff --git a/great_expectations/expectations/metrics/column_map_metrics/column_values_in_set.py b/great_expectations/expectations/metrics/column_map_metrics/column_values_in_set.py index f6c9cb2936e4..6f1065ed94ac 100644 --- a/great_expectations/expectations/metrics/column_map_metrics/column_values_in_set.py +++ b/great_expectations/expectations/metrics/column_map_metrics/column_values_in_set.py @@ -18,7 +18,7 @@ try: import sqlalchemy as sa # noqa: TID251 except ImportError: - sa = None + sa = None # type: ignore[assignment] class ColumnValuesInSet(ColumnMapMetricProvider): diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py b/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py index 0de2c5dc5abd..17046a1c337b 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_condition_partial.py @@ -179,7 +179,7 @@ def inner_func( # noqa: PLR0913 if dialect is None: # Trino if hasattr(sqlalchemy_engine, "dialect"): - dialect = sqlalchemy_engine.dialect + dialect = sqlalchemy_engine.dialect # type: ignore[assignment] expected_condition = metric_fn( cls, diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_map_condition_auxilliary_methods.py b/great_expectations/expectations/metrics/map_metric_provider/column_map_condition_auxilliary_methods.py index 17a863aab2f2..084f742a1d82 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_map_condition_auxilliary_methods.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_map_condition_auxilliary_methods.py @@ -263,12 +263,12 @@ def _sqlalchemy_column_map_condition_values( selectable = execution_engine.get_domain_records(domain_kwargs=compute_domain_kwargs) - query = sa.select(sa.column(column_name).label("unexpected_values")).where(unexpected_condition) + query = sa.select(sa.column(column_name).label("unexpected_values")).where(unexpected_condition) # type: ignore[var-annotated] if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): if hasattr(selectable, "subquery"): query = query.select_from(selectable.subquery()) else: - query = query.select_from(selectable) + query = query.select_from(selectable) # type: ignore[arg-type] result_format = metric_value_kwargs["result_format"] @@ -317,13 +317,13 @@ def _sqlalchemy_column_map_condition_value_counts( column_name: Union[str, sqlalchemy.quoted_name] = accessor_domain_kwargs["column"] - column: sa.Column = sa.column(column_name) + column: sa.Column = sa.column(column_name) # type: ignore[assignment] selectable = execution_engine.get_domain_records(domain_kwargs=compute_domain_kwargs) query = sa.select(column, sa.func.count(column)).where(unexpected_condition).group_by(column) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - query = query.select_from(selectable) + query = query.select_from(selectable) # type: ignore[arg-type] return execution_engine.execute_query(query).fetchall() diff --git a/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_condition_auxilliary_methods.py b/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_condition_auxilliary_methods.py index 58e54d4460b7..64a95cb32544 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_condition_auxilliary_methods.py +++ b/great_expectations/expectations/metrics/map_metric_provider/column_pair_map_condition_auxilliary_methods.py @@ -162,13 +162,13 @@ def _sqlalchemy_column_pair_map_condition_values( # noinspection PyPep8Naming column_B_name = accessor_domain_kwargs["column_B"] - query = sa.select( + query = sa.select( # type: ignore[var-annotated] sa.column(column_A_name).label("unexpected_values_A"), sa.column(column_B_name).label("unexpected_values_B"), ).where(boolean_mapped_unexpected_values) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - selectable = get_sqlalchemy_selectable(selectable) - query = query.select_from(selectable) + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] + query = query.select_from(selectable) # type: ignore[arg-type] result_format = metric_value_kwargs["result_format"] if result_format["result_format"] != "COMPLETE": @@ -205,7 +205,7 @@ def _sqlalchemy_column_pair_map_condition_filtered_row_count( selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) return execution_engine.execute_query( - sa.select(sa.func.count()).select_from(selectable) + sa.select(sa.func.count()).select_from(selectable) # type: ignore[arg-type] ).scalar() diff --git a/great_expectations/expectations/metrics/map_metric_provider/map_condition_auxilliary_methods.py b/great_expectations/expectations/metrics/map_metric_provider/map_condition_auxilliary_methods.py index 1a2fe2c57a38..df02a498fc44 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/map_condition_auxilliary_methods.py +++ b/great_expectations/expectations/metrics/map_metric_provider/map_condition_auxilliary_methods.py @@ -293,7 +293,7 @@ def _sqlalchemy_map_condition_unexpected_count_value( selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) # The integral values are cast to SQL Numeric in order to avoid a bug in AWS Redshift (converted to integer later). # noqa: E501 - count_case_statement: List[sqlalchemy.Label] = sa.case( + count_case_statement: List[sqlalchemy.Label] = sa.case( # type: ignore[assignment] ( unexpected_condition, sa.sql.expression.cast(1, sa.Numeric), @@ -301,14 +301,14 @@ def _sqlalchemy_map_condition_unexpected_count_value( else_=sa.sql.expression.cast(0, sa.Numeric), ).label("condition") - count_selectable: sqlalchemy.Select = sa.select(count_case_statement) + count_selectable: sqlalchemy.Select = sa.select(count_case_statement) # type: ignore[call-overload] if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - selectable = get_sqlalchemy_selectable(selectable) - count_selectable = count_selectable.select_from(selectable) + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] + count_selectable = count_selectable.select_from(selectable) # type: ignore[arg-type] try: if execution_engine.dialect_name == GXSqlDialect.MSSQL: - with execution_engine.get_connection() as connection: + with execution_engine.get_connection() as connection: # type: ignore[var-annotated] if not connection.closed: temp_table_obj = _generate_temp_table( connection=connection, @@ -325,22 +325,22 @@ def _sqlalchemy_map_condition_unexpected_count_value( metrics=metrics, ) inner_case_query: sqlalchemy.Insert = temp_table_obj.insert().from_select( - [count_case_statement], + [count_case_statement], # type: ignore[list-item] count_selectable, ) - execution_engine.execute_query_in_transaction(inner_case_query) + execution_engine.execute_query_in_transaction(inner_case_query) # type: ignore[arg-type] - count_selectable = temp_table_obj + count_selectable = temp_table_obj # type: ignore[assignment] - count_selectable = get_sqlalchemy_selectable(count_selectable) + count_selectable = get_sqlalchemy_selectable(count_selectable) # type: ignore[assignment] unexpected_count_query: sqlalchemy.Select = ( - sa.select( + sa.select( # type: ignore[assignment] sa.func.sum(sa.column("condition")).label("unexpected_count"), ) - .select_from(count_selectable) + .select_from(count_selectable) # type: ignore[arg-type] .alias("UnexpectedCountSubquery") ) - unexpected_count: Union[float, int] = execution_engine.execute_query( + unexpected_count: Union[float, int] = execution_engine.execute_query( # type: ignore[assignment] sa.select( unexpected_count_query.c[ f"{SummarizationMetricNameSuffixes.UNEXPECTED_COUNT.value}" @@ -384,11 +384,11 @@ def _sqlalchemy_map_condition_rows( selectable = execution_engine.get_domain_records(domain_kwargs=domain_kwargs) table_columns: list[str] = metrics["table.columns"] - column_selector = [sa.column(column_name) for column_name in table_columns] + column_selector = [sa.column(column_name) for column_name in table_columns] # type: ignore[var-annotated] query = sa.select(*column_selector).where(unexpected_condition) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - selectable = get_sqlalchemy_selectable(selectable) - query = query.select_from(selectable) + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] + query = query.select_from(selectable) # type: ignore[arg-type] result_format = metric_value_kwargs["result_format"] if result_format["result_format"] != "COMPLETE": @@ -461,12 +461,12 @@ def _sqlalchemy_map_condition_query( # noqa: C901 - too complex f"Please check your configuration and try again." ) - column_selector.append(sa.column(column_name)) + column_selector.append(sa.column(column_name)) # type: ignore[arg-type] for column_name in domain_column_name_list: - column_selector.append(sa.column(column_name)) + column_selector.append(sa.column(column_name)) # type: ignore[arg-type] - unexpected_condition_query_with_selected_columns: sa.select = sa.select(*column_selector).where( + unexpected_condition_query_with_selected_columns: sa.select = sa.select(*column_selector).where( # type: ignore[valid-type] unexpected_condition ) source_table_and_schema: sa.Table = get_sqlalchemy_source_table_and_schema(execution_engine) @@ -474,8 +474,8 @@ def _sqlalchemy_map_condition_query( # noqa: C901 - too complex source_table_and_schema_as_selectable: Union[sa.Table, sa.Select] = get_sqlalchemy_selectable( source_table_and_schema ) - final_select_statement: sa.select = ( - unexpected_condition_query_with_selected_columns.select_from( + final_select_statement: sa.select = ( # type: ignore[valid-type] + unexpected_condition_query_with_selected_columns.select_from( # type: ignore[attr-defined] source_table_and_schema_as_selectable ) ) @@ -543,27 +543,27 @@ def _sqlalchemy_map_condition_index( # noqa: C901 - too complex message=f'Error: The unexpected_index_column: "{column_name}" in does not exist in SQL Table. ' # noqa: E501 f"Please check your configuration and try again." ) - column_selector.append(sa.column(column_name)) + column_selector.append(sa.column(column_name)) # type: ignore[arg-type] # the last column we SELECT is the column the Expectation is being run on for column_name in domain_column_name_list: - column_selector.append(sa.column(column_name)) + column_selector.append(sa.column(column_name)) # type: ignore[arg-type] domain_records_as_selectable: sa.sql.Selectable = execution_engine.get_domain_records( domain_kwargs=domain_kwargs ) - unexpected_condition_query_with_selected_columns: sa.select = sa.select(*column_selector).where( + unexpected_condition_query_with_selected_columns: sa.select = sa.select(*column_selector).where( # type: ignore[valid-type] unexpected_condition ) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - domain_records_as_selectable = get_sqlalchemy_selectable(domain_records_as_selectable) + domain_records_as_selectable = get_sqlalchemy_selectable(domain_records_as_selectable) # type: ignore[arg-type] # since SQL tables can be **very** large, truncate query_result values at 20, or at `partial_unexpected_count` # noqa: E501 - final_query: sa.select = unexpected_condition_query_with_selected_columns.select_from( + final_query: sa.select = unexpected_condition_query_with_selected_columns.select_from( # type: ignore[valid-type,attr-defined] domain_records_as_selectable ).limit(result_format["partial_unexpected_count"]) - query_result: List[sqlalchemy.Row] = execution_engine.execute_query(final_query).fetchall() + query_result: List[sqlalchemy.Row] = execution_engine.execute_query(final_query).fetchall() # type: ignore[assignment] exclude_unexpected_values: bool = result_format.get("exclude_unexpected_values", False) diff --git a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_condition_partial.py b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_condition_partial.py index f38b255c23e5..098968e28ef0 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_condition_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_condition_partial.py @@ -175,7 +175,7 @@ def inner_func( # noqa: PLR0913 sqlalchemy_engine: sqlalchemy.Engine = execution_engine.engine - column_selector = [sa.column(column_name) for column_name in column_list] + column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] dialect = execution_engine.dialect_module expected_condition = metric_fn( cls, diff --git a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_function_partial.py b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_function_partial.py index e0b34c8d5541..8034d25102bd 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_function_partial.py +++ b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_function_partial.py @@ -164,7 +164,7 @@ def inner_func( # noqa: PLR0913 sqlalchemy_engine: sqlalchemy.Engine = execution_engine.engine - column_selector = [sa.column(column_name) for column_name in column_list] + column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] dialect = execution_engine.dialect_module multicolumn_function = metric_fn( cls, diff --git a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_condition_auxilliary_methods.py b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_condition_auxilliary_methods.py index cbd658b902c7..6a43447d7040 100644 --- a/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_condition_auxilliary_methods.py +++ b/great_expectations/expectations/metrics/map_metric_provider/multicolumn_map_condition_auxilliary_methods.py @@ -152,11 +152,11 @@ def _sqlalchemy_multicolumn_map_condition_values( column_list: List[Union[str, sqlalchemy.quoted_name]] = accessor_domain_kwargs["column_list"] - column_selector = [sa.column(column_name) for column_name in column_list] + column_selector = [sa.column(column_name) for column_name in column_list] # type: ignore[var-annotated] query = sa.select(*column_selector).where(boolean_mapped_unexpected_values) if not _is_sqlalchemy_metric_selectable(map_metric_provider=cls): - selectable = get_sqlalchemy_selectable(selectable) - query = query.select_from(selectable) + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] + query = query.select_from(selectable) # type: ignore[arg-type] result_format = metric_value_kwargs["result_format"] if result_format["result_format"] != "COMPLETE": @@ -195,10 +195,10 @@ def _sqlalchemy_multicolumn_map_condition_filtered_row_count( """ # noqa: E501 ) - selectable = get_sqlalchemy_selectable(selectable) + selectable = get_sqlalchemy_selectable(selectable) # type: ignore[arg-type] return execution_engine.execute_query( - sa.select(sa.func.count()).select_from(selectable) + sa.select(sa.func.count()).select_from(selectable) # type: ignore[arg-type] ).scalar() diff --git a/great_expectations/expectations/metrics/query_metrics/query_column.py b/great_expectations/expectations/metrics/query_metrics/query_column.py index 67263d2845f6..a322855e2cd3 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_column.py +++ b/great_expectations/expectations/metrics/query_metrics/query_column.py @@ -60,7 +60,7 @@ def _sqlalchemy( else: query = query.format(col=column, batch=f"({selectable})") - result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() + result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() # type: ignore[assignment,arg-type] return [element._asdict() for element in result] diff --git a/great_expectations/expectations/metrics/query_metrics/query_column_pair.py b/great_expectations/expectations/metrics/query_metrics/query_column_pair.py index 20f945396959..428b27baf4ce 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_column_pair.py +++ b/great_expectations/expectations/metrics/query_metrics/query_column_pair.py @@ -63,7 +63,7 @@ def _sqlalchemy( else: query = query.format(column_A=column_A, column_B=column_B, batch=f"({selectable})") - result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() + result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() # type: ignore[assignment,arg-type] return [element._asdict() for element in result] diff --git a/great_expectations/expectations/metrics/query_metrics/query_multiple_columns.py b/great_expectations/expectations/metrics/query_metrics/query_multiple_columns.py index 7f30ae2f221e..19c5ebff27bb 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_multiple_columns.py +++ b/great_expectations/expectations/metrics/query_metrics/query_multiple_columns.py @@ -76,7 +76,7 @@ def _sqlalchemy( batch=f"({selectable})", ) - result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() + result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() # type: ignore[assignment,arg-type] return [element._asdict() for element in result] diff --git a/great_expectations/expectations/metrics/query_metrics/query_table.py b/great_expectations/expectations/metrics/query_metrics/query_table.py index eec01abe8083..8bf1e7362002 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_table.py +++ b/great_expectations/expectations/metrics/query_metrics/query_table.py @@ -56,7 +56,7 @@ def _sqlalchemy( else: query = query.format(batch=f"({selectable})") - result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() + result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() # type: ignore[assignment,arg-type] return [element._asdict() for element in result] # diff --git a/great_expectations/expectations/metrics/query_metrics/query_template_values.py b/great_expectations/expectations/metrics/query_metrics/query_template_values.py index 88dc243201e0..91c176f4d1ec 100644 --- a/great_expectations/expectations/metrics/query_metrics/query_template_values.py +++ b/great_expectations/expectations/metrics/query_metrics/query_template_values.py @@ -86,7 +86,7 @@ def _sqlalchemy( query = cls.get_query(query, template_dict, f"({selectable})") try: - result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() + result: List[sqlalchemy.Row] = execution_engine.execute_query(sa.text(query)).fetchall() # type: ignore[assignment,arg-type] except Exception as e: if hasattr(e, "_query_id"): # query_id removed because it duplicates the validation_results diff --git a/great_expectations/expectations/metrics/table_metrics/table_column_types.py b/great_expectations/expectations/metrics/table_metrics/table_column_types.py index ae17732b7011..729487d4a443 100644 --- a/great_expectations/expectations/metrics/table_metrics/table_column_types.py +++ b/great_expectations/expectations/metrics/table_metrics/table_column_types.py @@ -92,12 +92,12 @@ def _get_sqlalchemy_column_metadata( ): table_selectable: str | sqlalchemy.TextClause - if sqlalchemy.Table and isinstance(batch_data.selectable, sqlalchemy.Table): + if sqlalchemy.Table and isinstance(batch_data.selectable, sqlalchemy.Table): # type: ignore[truthy-function] table_selectable = batch_data.source_table_name or batch_data.selectable.name schema_name = batch_data.source_schema_name or batch_data.selectable.schema # if custom query was passed in - elif sqlalchemy.TextClause and isinstance(batch_data.selectable, sqlalchemy.TextClause): + elif sqlalchemy.TextClause and isinstance(batch_data.selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] table_selectable = batch_data.selectable schema_name = None else: @@ -106,7 +106,7 @@ def _get_sqlalchemy_column_metadata( return get_sqlalchemy_column_metadata( execution_engine=execution_engine, - table_selectable=table_selectable, + table_selectable=table_selectable, # type: ignore[arg-type] schema_name=schema_name, ) diff --git a/great_expectations/expectations/metrics/table_metrics/table_head.py b/great_expectations/expectations/metrics/table_metrics/table_head.py index 019b9cfd2988..d66c628d2f71 100644 --- a/great_expectations/expectations/metrics/table_metrics/table_head.py +++ b/great_expectations/expectations/metrics/table_metrics/table_head.py @@ -75,10 +75,10 @@ def _sqlalchemy( if metric_value_kwargs["fetch_all"]: limit = None - selectable = sa.select("*").select_from(selectable).limit(limit).selectable + selectable = sa.select("*").select_from(selectable).limit(limit).selectable # type: ignore[assignment,arg-type] try: - with execution_engine.get_connection() as con: + with execution_engine.get_connection() as con: # type: ignore[var-annotated] df = pandas_read_sql( sql=selectable, con=con, diff --git a/great_expectations/expectations/metrics/util.py b/great_expectations/expectations/metrics/util.py index 9d8d3152cb9e..24ca15db4e15 100644 --- a/great_expectations/expectations/metrics/util.py +++ b/great_expectations/expectations/metrics/util.py @@ -41,7 +41,7 @@ import psycopg2 # noqa: F401 import sqlalchemy.dialects.postgresql.psycopg2 as sqlalchemy_psycopg2 # noqa: TID251 except (ImportError, KeyError): - sqlalchemy_psycopg2 = None + sqlalchemy_psycopg2 = None # type: ignore[assignment] try: import snowflake @@ -373,7 +373,7 @@ def get_sqlalchemy_column_metadata( inspector = execution_engine.get_inspector() try: # if a custom query was passed - if sqlalchemy.TextClause and isinstance(table_selectable, sqlalchemy.TextClause): + if sqlalchemy.TextClause and isinstance(table_selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] if hasattr(table_selectable, "selected_columns"): # New in version 1.4. columns = table_selectable.selected_columns.columns @@ -386,7 +386,7 @@ def get_sqlalchemy_column_metadata( table_name = str(table_selectable) if execution_engine.dialect_name == GXSqlDialect.SNOWFLAKE: table_name = table_name.lower() - columns = inspector.get_columns( + columns = inspector.get_columns( # type: ignore[assignment] table_name=table_name, schema=schema_name, ) @@ -447,7 +447,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 if dialect.name.lower() == "mssql": # Get column names and types from the database # Reference: https://dataedo.com/kb/query/sql-server/list-table-columns-in-database - tables_table_clause: sqlalchemy.TableClause = sa.table( + tables_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] "tables", sa.column("object_id"), sa.column("schema_id"), @@ -455,7 +455,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 schema="sys", ).alias("sys_tables_table_clause") tables_table_query: sqlalchemy.Select = ( - sa.select( + sa.select( # type: ignore[assignment] tables_table_clause.columns.object_id.label("object_id"), sa.func.schema_name(tables_table_clause.columns.schema_id).label("schema_name"), tables_table_clause.columns.name.label("table_name"), @@ -463,7 +463,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 .select_from(tables_table_clause) .alias("sys_tables_table_subquery") ) - columns_table_clause: sqlalchemy.TableClause = sa.table( + columns_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] "columns", sa.column("object_id"), sa.column("user_type_id"), @@ -474,7 +474,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 schema="sys", ).alias("sys_columns_table_clause") columns_table_query: sqlalchemy.Select = ( - sa.select( + sa.select( # type: ignore[assignment] columns_table_clause.columns.object_id.label("object_id"), columns_table_clause.columns.user_type_id.label("user_type_id"), columns_table_clause.columns.column_id.label("column_id"), @@ -485,24 +485,24 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 .select_from(columns_table_clause) .alias("sys_columns_table_subquery") ) - types_table_clause: sqlalchemy.TableClause = sa.table( + types_table_clause: sqlalchemy.TableClause = sa.table( # type: ignore[assignment] "types", sa.column("user_type_id"), sa.column("name"), schema="sys", ).alias("sys_types_table_clause") types_table_query: sqlalchemy.Select = ( - sa.select( + sa.select( # type: ignore[assignment] types_table_clause.columns.user_type_id.label("user_type_id"), types_table_clause.columns.name.label("column_data_type"), ) .select_from(types_table_clause) .alias("sys_types_table_subquery") ) - inner_join_conditions: sqlalchemy.BinaryExpression = sa.and_( + inner_join_conditions: sqlalchemy.BinaryExpression = sa.and_( # type: ignore[assignment] *(tables_table_query.c.object_id == columns_table_query.c.object_id,) ) - outer_join_conditions: sqlalchemy.BinaryExpression = sa.and_( + outer_join_conditions: sqlalchemy.BinaryExpression = sa.and_( # type: ignore[assignment] *( columns_table_query.columns.user_type_id == types_table_query.columns.user_type_id, @@ -519,7 +519,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 columns_table_query.c.column_precision, ) .select_from( - tables_table_query.join( + tables_table_query.join( # type: ignore[call-arg,arg-type] right=columns_table_query, onclause=inner_join_conditions, isouter=False, @@ -529,14 +529,14 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 isouter=True, ) ) - .where(tables_table_query.c.table_name == selectable.name) + .where(tables_table_query.c.table_name == selectable.name) # type: ignore[attr-defined] .order_by( tables_table_query.c.schema_name.asc(), tables_table_query.c.table_name.asc(), columns_table_query.c.column_id.asc(), ) ) - col_info_tuples_list: List[tuple] = connection.execute(col_info_query).fetchall() + col_info_tuples_list: List[tuple] = connection.execute(col_info_query).fetchall() # type: ignore[assignment] # type_module = _get_dialect_type_module(dialect=dialect) col_info_dict_list = [ { @@ -548,7 +548,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 ] elif dialect.name.lower() == "trino": try: - table_name = selectable.name + table_name = selectable.name # type: ignore[attr-defined] except AttributeError: table_name = selectable if str(table_name).lower().startswith("select"): @@ -564,7 +564,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 schema="information_schema", ) tables_table_query = ( - sa.select( + sa.select( # type: ignore[assignment] sa.column("table_schema").label("schema_name"), sa.column("table_name").label("table_name"), ) @@ -577,7 +577,7 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 schema="information_schema", ) columns_table_query = ( - sa.select( + sa.select( # type: ignore[assignment] sa.column("column_name").label("column_name"), sa.column("table_name").label("table_name"), sa.column("table_schema").label("schema_name"), @@ -593,14 +593,14 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 ) ) col_info_query = ( - sa.select( + sa.select( # type: ignore[assignment] tables_table_query.c.schema_name, tables_table_query.c.table_name, columns_table_query.c.column_name, columns_table_query.c.column_data_type, ) .select_from( - tables_table_query.join( + tables_table_query.join( # type: ignore[call-arg,arg-type] right=columns_table_query, onclause=conditions, isouter=False ) ) @@ -622,9 +622,9 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 # in sqlalchemy > 2.0.0 this is a Subquery, which we need to convert into a Selectable if not col_info_query.supports_execution: - col_info_query = sa.select(col_info_query) + col_info_query = sa.select(col_info_query) # type: ignore[call-overload] - col_info_tuples_list = connection.execute(col_info_query).fetchall() + col_info_tuples_list = connection.execute(col_info_query).fetchall() # type: ignore[assignment] # type_module = _get_dialect_type_module(dialect=dialect) col_info_dict_list = [ { @@ -635,21 +635,21 @@ def column_reflection_fallback( # noqa: C901, PLR0912, PLR0915 ] else: # if a custom query was passed - if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): + if sqlalchemy.TextClause and isinstance(selectable, sqlalchemy.TextClause): # type: ignore[truthy-function] query: sqlalchemy.TextClause = selectable - elif sqlalchemy.Table and isinstance(selectable, sqlalchemy.Table): + elif sqlalchemy.Table and isinstance(selectable, sqlalchemy.Table): # type: ignore[truthy-function] query = sa.select(sa.text("*")).select_from(selectable).limit(1) else: # noqa: PLR5501 # noinspection PyUnresolvedReferences if dialect.name.lower() == GXSqlDialect.REDSHIFT: # Redshift needs temp tables to be declared as text - query = sa.select(sa.text("*")).select_from(sa.text(selectable)).limit(1) + query = sa.select(sa.text("*")).select_from(sa.text(selectable)).limit(1) # type: ignore[assignment,arg-type] else: - query = sa.select(sa.text("*")).select_from(sa.text(selectable)).limit(1) + query = sa.select(sa.text("*")).select_from(sa.text(selectable)).limit(1) # type: ignore[assignment,arg-type] result_object = connection.execute(query) # noinspection PyProtectedMember - col_names: List[str] = result_object._metadata.keys + col_names: List[str] = result_object._metadata.keys # type: ignore[assignment] col_info_dict_list = [{"name": col_name} for col_name in col_names] return col_info_dict_list @@ -1151,7 +1151,7 @@ def sql_statement_with_post_compile_to_string( String representation of select_statement """ # noqa: E501 - sqlalchemy_connection: sa.engine.base.Connection = engine.engine + sqlalchemy_connection: sa.engine.base.Connection = engine.engine # type: ignore[assignment] compiled = select_statement.compile( sqlalchemy_connection, compile_kwargs={"render_postcompile": True}, @@ -1160,7 +1160,7 @@ def sql_statement_with_post_compile_to_string( dialect_name: str = engine.dialect_name if dialect_name in ["sqlite", "trino", "mssql"]: - params = (repr(compiled.params[name]) for name in compiled.positiontup) + params = (repr(compiled.params[name]) for name in compiled.positiontup) # type: ignore[union-attr] query_as_string = re.sub(r"\?", lambda m: next(params), str(compiled)) else: diff --git a/great_expectations/experimental/rule_based_profiler/attributed_resolved_metrics.py b/great_expectations/experimental/rule_based_profiler/attributed_resolved_metrics.py index 7d5a66353079..5c2ac56a7a22 100644 --- a/great_expectations/experimental/rule_based_profiler/attributed_resolved_metrics.py +++ b/great_expectations/experimental/rule_based_profiler/attributed_resolved_metrics.py @@ -34,7 +34,7 @@ def _detect_illegal_array_type_or_shape(values: MetricValues) -> bool: # noqa: properties=( pd.DataFrame, pd.Series, - sqlalchemy.Row if sqlalchemy.Row else None, + sqlalchemy.Row if sqlalchemy.Row else None, # type: ignore[truthy-function] pyspark.Row if pyspark.Row else None, # type: ignore[truthy-function] set, ) diff --git a/great_expectations/self_check/util.py b/great_expectations/self_check/util.py index b11682b3279f..8226cd9556f8 100644 --- a/great_expectations/self_check/util.py +++ b/great_expectations/self_check/util.py @@ -151,8 +151,8 @@ "NUMERIC": postgresqltypes.NUMERIC, } except (ImportError, KeyError): - postgresqltypes = None - pgDialect = None + postgresqltypes = None # type: ignore[assignment] + pgDialect = None # type: ignore[assignment] POSTGRESQL_TYPES = {} try: @@ -176,8 +176,8 @@ "TINYINT": mysqltypes.TINYINT, } except (ImportError, KeyError): - mysqltypes = None - mysqlDialect = None + mysqltypes = None # type: ignore[assignment] + mysqlDialect = None # type: ignore[assignment] MYSQL_TYPES = {} try: @@ -189,9 +189,9 @@ from sqlalchemy.dialects.mssql import dialect as mssqlDialect # noqa: TID251 try: - mssqltypes.INT # noqa: B018 # reassigning if attr not found + mssqltypes.INT # type: ignore[attr-defined] # noqa: B018 # reassigning if attr not found except AttributeError: - mssqltypes.INT = mssqltypes.INTEGER + mssqltypes.INT = mssqltypes.INTEGER # type: ignore[attr-defined] # noinspection PyUnresolvedReferences MSSQL_TYPES = { @@ -206,7 +206,7 @@ "DECIMAL": mssqltypes.DECIMAL, "FLOAT": mssqltypes.FLOAT, "IMAGE": mssqltypes.IMAGE, - "INT": mssqltypes.INT, + "INT": mssqltypes.INT, # type: ignore[attr-defined] "INTEGER": mssqltypes.INTEGER, "MONEY": mssqltypes.MONEY, "NCHAR": mssqltypes.NCHAR, @@ -227,8 +227,8 @@ "VARCHAR": mssqltypes.VARCHAR, } except (ImportError, KeyError): - mssqltypes = None - mssqlDialect = None + mssqltypes = None # type: ignore[assignment] + mssqlDialect = None # type: ignore[assignment] MSSQL_TYPES = {} @@ -740,13 +740,13 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 try: dialect_classes["postgresql"] = postgresqltypes.dialect - dialect_types["postgresql"] = POSTGRESQL_TYPES + dialect_types["postgresql"] = POSTGRESQL_TYPES # type: ignore[assignment] except AttributeError: pass try: dialect_classes["mysql"] = mysqltypes.dialect - dialect_types["mysql"] = MYSQL_TYPES + dialect_types["mysql"] = MYSQL_TYPES # type: ignore[assignment] except AttributeError: pass @@ -841,7 +841,7 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 if ( schemas and sa_engine_name in schemas - and isinstance(engine.dialect, dialect_classes[sa_engine_name]) + and isinstance(engine.dialect, dialect_classes[sa_engine_name]) # type: ignore[union-attr] ): schema = schemas[sa_engine_name] if pk_column: @@ -894,7 +894,7 @@ def build_sa_validator_with_data( # noqa: C901, PLR0912, PLR0913, PLR0915 execution_engine = SqlAlchemyExecutionEngine(caching=caching, engine=engine) batch_data = SqlAlchemyBatchData(execution_engine=execution_engine, table_name=table_name) - with execution_engine.get_connection() as connection: + with execution_engine.get_connection() as connection: # type: ignore[var-annotated] _debug("Calling df.to_sql") _start = time.time() add_dataframe_to_db( diff --git a/great_expectations/util.py b/great_expectations/util.py index fba7e4e2f716..627c3dbe502f 100644 --- a/great_expectations/util.py +++ b/great_expectations/util.py @@ -1021,11 +1021,11 @@ def get_sqlalchemy_selectable( https://docs.sqlalchemy.org/en/14/changelog/migration_14.html#change-4617 """ # noqa: E501 - if sqlalchemy.Select and isinstance(selectable, sqlalchemy.Select): + if sqlalchemy.Select and isinstance(selectable, sqlalchemy.Select): # type: ignore[truthy-function] if version.parse(sa.__version__) >= version.parse("1.4"): - selectable = selectable.subquery() + selectable = selectable.subquery() # type: ignore[assignment] else: - selectable = selectable.alias() + selectable = selectable.alias() # type: ignore[assignment] return selectable @@ -1327,10 +1327,10 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 return dict(data) # sqlalchemy text for SqlAlchemy 2 compatibility - if sqlalchemy.TextClause and isinstance(data, sqlalchemy.TextClause): + if sqlalchemy.TextClause and isinstance(data, sqlalchemy.TextClause): # type: ignore[truthy-function] return str(data) - if Row and isinstance(data, Row): + if Row and isinstance(data, Row): # type: ignore[truthy-function] return str(data) if isinstance(data, decimal.Decimal): @@ -1345,7 +1345,7 @@ def convert_to_json_serializable( # noqa: C901, PLR0911, PLR0912 if pyspark.types and isinstance(data, pyspark.types.StructType): return dict(data.jsonValue()) - if sqlalchemy.Connection and isinstance(data, sqlalchemy.Connection): + if sqlalchemy.Connection and isinstance(data, sqlalchemy.Connection): # type: ignore[truthy-function] # Connection is a module, which is non-serializable. Return module name instead. return "sqlalchemy.engine.base.Connection" @@ -1460,11 +1460,11 @@ def ensure_json_serializable(data: Any) -> None: # noqa: C901, PLR0911, PLR0912 if isinstance(data, RunIdentifier): return - if sqlalchemy.TextClause and isinstance(data, sqlalchemy.TextClause): + if sqlalchemy.TextClause and isinstance(data, sqlalchemy.TextClause): # type: ignore[truthy-function] # TextClause is handled manually by convert_to_json_serializable() return - if sqlalchemy.Connection and isinstance(data, sqlalchemy.Connection): + if sqlalchemy.Connection and isinstance(data, sqlalchemy.Connection): # type: ignore[truthy-function] # Connection module is handled manually by convert_to_json_serializable() return diff --git a/reqs/requirements-dev-sqlalchemy.txt b/reqs/requirements-dev-sqlalchemy.txt index e7ea6b56f6b1..6445dbb7fa99 100644 --- a/reqs/requirements-dev-sqlalchemy.txt +++ b/reqs/requirements-dev-sqlalchemy.txt @@ -1,9 +1,8 @@ # Do not `pip install -r` this file locally or in CI (outside of StaticTypeCheck). # It only exists to be included in requirements-dev.txt, which should only be used in the Dockerfile.tests file. - ---requirement requirements-dev-athena.txt --requirement requirements-dev-lite.txt +--requirement requirements-dev-athena.txt --requirement requirements-dev-sqlalchemy1.txt --requirement requirements-dev-bigquery.txt --requirement requirements-dev-dremio.txt diff --git a/reqs/requirements-dev-sqlalchemy2.txt b/reqs/requirements-dev-sqlalchemy2.txt new file mode 100644 index 000000000000..bec49f805b22 --- /dev/null +++ b/reqs/requirements-dev-sqlalchemy2.txt @@ -0,0 +1,17 @@ +# TODO: remove this file once SQLAlchemy 2 is the only supported version +--requirement requirements-dev-databricks.txt +--requirement requirements-dev-mssql.txt +--requirement requirements-dev-mysql.txt +--requirement requirements-dev-postgresql.txt +--requirement requirements-dev-trino.txt +--requirement requirements-dev-hive.txt +--requirement requirements-dev-vertica.txt +# Do not `pip install -r` this file locally or in CI (outside of StaticTypeCheck). +# It only exists to be included in requirements-dev.txt, which should only be used in the Dockerfile.tests file. +# Do not add any dependencies here that are not compatible with sqlalchemy>=2.0 +# ----------- +# Tempory pins for type checking step +# TODO: update these pins in their respective requirements files and remove from here +snowflake-sqlalchemy>=1.6 # min version required for sqlalchemy 2.0 +sqlalchemy>=2.0 +sqlalchemy-bigquery>=1.11.0 # min version required for sqlalchemy 2.0 diff --git a/requirements-types.txt b/requirements-types.txt index 7e6dd9a709ce..5dca3f4883b6 100644 --- a/requirements-types.txt +++ b/requirements-types.txt @@ -1,12 +1,15 @@ +--requirement reqs/requirements-dev-sqlalchemy2.txt --requirement requirements.txt +--requirement reqs/requirements-dev-lite.txt --requirement reqs/requirements-dev-spark.txt --requirement reqs/requirements-dev-azure.txt --requirement reqs/requirements-dev-contrib.txt --requirement reqs/requirements-dev-cloud.txt ---requirement reqs/requirements-dev-sqlalchemy.txt --requirement docs/sphinx_api_docs_source/requirements-dev-api-docs.txt -# typing stubs pandas-stubs +# force sqlalchemy 2 because it exports types +sqlalchemy>2.0 +# typing stubs types-beautifulsoup4 types-colorama types-decorator diff --git a/setup.py b/setup.py index 68d153ac50e9..056c0dbb8c3d 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,6 @@ def get_extras_require(): "bigquery", # https://github.com/googleapis/python-bigquery-sqlalchemy/blob/main/setup.py "clickhouse", # https://github.com/xzkostyan/clickhouse-sqlalchemy/blob/master/setup.py "redshift", # https://github.com/sqlalchemy-redshift/sqlalchemy-redshift/blob/main/setup.py - "snowflake", # https://github.com/snowflakedb/snowflake-sqlalchemy/blob/main/setup.cfg "teradata", # https://pypi.org/project/teradatasqlalchemy https://support.teradata.com/knowledge?id=kb_article_view&sys_kb_id=a5a869149729251ced863fe3f153af27 ) sqla_keys = ( @@ -46,9 +45,11 @@ def get_extras_require(): "trino", # https://github.com/trinodb/trino-python-client/blob/master/setup.py "vertica", # https://github.com/bluelabsio/sqlalchemy-vertica-python/blob/master/setup.py "databricks", # https://github.com/databricks/databricks-sql-python/blob/main/pyproject.toml + "snowflake", # https://github.com/snowflakedb/snowflake-sqlalchemy/blob/main/setup.cfg ) ignore_keys = ( "sqlalchemy", + "sqlalchemy2", "test", "tools", "all-contrib-expectations", diff --git a/tests/conftest.py b/tests/conftest.py index 219489b3b87c..13806119a73a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1454,7 +1454,7 @@ def evr_success(): @pytest.fixture -def sqlite_view_engine(test_backends) -> Engine: +def sqlite_view_engine(test_backends) -> Engine: # type: ignore[return] # Create a small in-memory engine with two views, one of which is temporary if "sqlite" in test_backends: try: @@ -1479,7 +1479,7 @@ def sqlite_view_engine(test_backends) -> Engine: ) return sqlite_engine except ImportError: - sa = None + sa = None # type: ignore[assignment] else: pytest.skip("SqlAlchemy tests disabled; not testing views") diff --git a/tests/datasource/fluent/conftest.py b/tests/datasource/fluent/conftest.py index 124750090046..929a620aead7 100644 --- a/tests/datasource/fluent/conftest.py +++ b/tests/datasource/fluent/conftest.py @@ -99,7 +99,7 @@ def __init__(self, create_temp_table: bool = True, *args, **kwargs): # We should likely let the user pass in an engine. In a SqlAlchemyExecutionEngine used in # noqa: E501 # non-mocked code the engine property is of the type: # from sqlalchemy.engine import Engine as SaEngine - self.engine = MockSaEngine(dialect=Dialect(dialect)) + self.engine = MockSaEngine(dialect=Dialect(dialect)) # type: ignore[assignment] self._create_temp_table = create_temp_table @override diff --git a/tests/datasource/fluent/integration/test_sql_datasources.py b/tests/datasource/fluent/integration/test_sql_datasources.py index df44a4ada188..a138d2f88899 100644 --- a/tests/datasource/fluent/integration/test_sql_datasources.py +++ b/tests/datasource/fluent/integration/test_sql_datasources.py @@ -232,9 +232,9 @@ def _table_factory( ) created_tables: list[dict[Literal["table_name", "schema"], str | None]] = [] - with gx_engine.get_connection() as conn: - quoted_upper_col: str = quote_str(QUOTED_UPPER_COL, dialect=sa_engine.dialect.name) - quoted_lower_col: str = quote_str(QUOTED_LOWER_COL, dialect=sa_engine.dialect.name) + with gx_engine.get_connection() as conn: # type: ignore[var-annotated] + quoted_upper_col: str = quote_str(QUOTED_UPPER_COL, dialect=sa_engine.dialect.name) # type: ignore[arg-type] + quoted_lower_col: str = quote_str(QUOTED_LOWER_COL, dialect=sa_engine.dialect.name) # type: ignore[arg-type] transaction = conn.begin() if schema: conn.execute(TextClause(f"CREATE SCHEMA IF NOT EXISTS {schema}")) @@ -703,7 +703,7 @@ def _raw_query_check_column_exists( gx_execution_engine: SqlAlchemyExecutionEngine, ) -> bool: """Use a simple 'SELECT {column_name_param} from {qualified_table_name};' query to check if the column exists.'""" # noqa: E501 - with gx_execution_engine.get_connection() as connection: + with gx_execution_engine.get_connection() as connection: # type: ignore[var-annotated] query = f"""SELECT {column_name_param} FROM {qualified_table_name} LIMIT 1;""" print(f"query:\n {query}") # an exception will be raised if the column does not exist @@ -772,7 +772,7 @@ def test_column_expectation( datasource = all_sql_datasources dialect = datasource.get_engine().dialect.name - if _is_quote_char_dialect_mismatch(dialect, column_name): + if _is_quote_char_dialect_mismatch(dialect, column_name): # type: ignore[arg-type] pytest.skip(f"quote char dialect mismatch: {column_name[0]}") if _requires_fix(param_id): diff --git a/tests/execution_engine/test_sqlalchemy_execution_engine.py b/tests/execution_engine/test_sqlalchemy_execution_engine.py index 692e3bf22eba..936f3765dd04 100644 --- a/tests/execution_engine/test_sqlalchemy_execution_engine.py +++ b/tests/execution_engine/test_sqlalchemy_execution_engine.py @@ -1141,7 +1141,7 @@ def test_same_connection_used_from_static_pool_sqlite(self, sa, pd_dataframe: pd temporary table wouldn't be accessible. """ # noqa: E501 execution_engine = SqlAlchemyExecutionEngine(connection_string="sqlite://") - with execution_engine.get_connection() as con: + with execution_engine.get_connection() as con: # type: ignore[var-annotated] add_dataframe_to_db(df=pd_dataframe, name="test", con=con, index=False) assert ( @@ -1173,7 +1173,7 @@ def test_same_connection_accessible_from_execution_engine_sqlite( from the pool). The same connection should be accessible from the execution engine after each query. """ # noqa: E501 execution_engine = SqlAlchemyExecutionEngine(connection_string="sqlite://") - with execution_engine.get_connection() as con: + with execution_engine.get_connection() as con: # type: ignore[var-annotated] add_dataframe_to_db(df=pd_dataframe, name="test", con=con, index=False) connection = con assert ( @@ -1183,17 +1183,17 @@ def test_same_connection_accessible_from_execution_engine_sqlite( create_temp_table = "CREATE TEMPORARY TABLE temp_table AS SELECT * FROM test;" execution_engine.execute_query_in_transaction(sa.text(create_temp_table)) - with execution_engine.get_connection() as test_con: + with execution_engine.get_connection() as test_con: # type: ignore[var-annotated] assert connection == test_con select_temp_table = "SELECT * FROM temp_table;" execution_engine.execute_query(sa.text(select_temp_table)).fetchall() - with execution_engine.get_connection() as test_con: + with execution_engine.get_connection() as test_con: # type: ignore[var-annotated] assert connection == test_con execution_engine.execute_query(sa.text(select_temp_table)).fetchall() - with execution_engine.get_connection() as test_con: + with execution_engine.get_connection() as test_con: # type: ignore[var-annotated] assert connection == test_con def test_get_connection_doesnt_close_on_exit_sqlite(self, sa): diff --git a/tests/integration/cloud/end_to_end/conftest.py b/tests/integration/cloud/end_to_end/conftest.py index f40c4a14dd51..9a3d669b04d1 100644 --- a/tests/integration/cloud/end_to_end/conftest.py +++ b/tests/integration/cloud/end_to_end/conftest.py @@ -157,7 +157,7 @@ def _table_factory( ) created_tables: list[dict[Literal["table_name", "schema_name"], str | None]] = [] - with gx_engine.get_connection() as conn: + with gx_engine.get_connection() as conn: # type: ignore[var-annotated] transaction = conn.begin() if schema_name: conn.execute(TextClause(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")) diff --git a/tests/test_definitions/test_expectations_v3_api.py b/tests/test_definitions/test_expectations_v3_api.py index 0ae7fd9dc236..10fdac7662ea 100644 --- a/tests/test_definitions/test_expectations_v3_api.py +++ b/tests/test_definitions/test_expectations_v3_api.py @@ -33,7 +33,7 @@ try: sqliteDialect = sqlalchemy.sqlite.dialect except (ImportError, AttributeError): - sqliteDialect = SQLALCHEMY_NOT_IMPORTED + sqliteDialect = SQLALCHEMY_NOT_IMPORTED # type: ignore[assignment] def pytest_generate_tests(metafunc): # noqa C901 - 35