Skip to content

Commit

Permalink
Add support for semicolon stripping to DbApiHook, PrestoHook, and Tri…
Browse files Browse the repository at this point in the history
…noHook (apache#41916)



---------

Co-authored-by: Elad Kalif <45845474+eladkal@users.noreply.github.com>
  • Loading branch information
Illumaria and eladkal authored Nov 13, 2024
1 parent 72d0b15 commit e7194df
Show file tree
Hide file tree
Showing 37 changed files with 100 additions and 63 deletions.
2 changes: 1 addition & 1 deletion dev/breeze/src/airflow_breeze/global_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@ def get_airflow_extras():
# END OF EXTRAS LIST UPDATED BY PRE COMMIT
]

CHICKEN_EGG_PROVIDERS = " ".join(["standard amazon"])
CHICKEN_EGG_PROVIDERS = " ".join(["standard amazon common.sql"])


BASE_PROVIDERS_COMPATIBILITY_CHECKS: list[dict[str, str | list[str]]] = [
Expand Down
4 changes: 2 additions & 2 deletions dev/breeze/tests/test_packages.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def test_get_documentation_package_path():
"postgres",
"beta0",
"""
"apache-airflow-providers-common-sql>=1.17.0b0",
"apache-airflow-providers-common-sql>=1.20.0b0",
"apache-airflow>=2.8.0b0",
"psycopg2-binary>=2.9.4",
""",
Expand All @@ -219,7 +219,7 @@ def test_get_documentation_package_path():
"postgres",
"",
"""
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"psycopg2-binary>=2.9.4",
""",
Expand Down
54 changes: 27 additions & 27 deletions generated/provider_dependencies.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"deps": [
"PyAthena>=3.0.10",
"apache-airflow-providers-common-compat>=1.2.1",
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow-providers-http",
"apache-airflow>=2.8.0",
"asgiref>=2.3.0",
Expand Down Expand Up @@ -102,7 +102,7 @@
},
"apache.drill": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"sqlalchemy-drill>=1.1.0"
],
Expand All @@ -116,7 +116,7 @@
},
"apache.druid": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pydruid>=0.4.1"
],
Expand Down Expand Up @@ -159,7 +159,7 @@
},
"apache.hive": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"hmsclient>=0.1.0",
"jmespath>=0.7.0",
Expand Down Expand Up @@ -201,7 +201,7 @@
},
"apache.impala": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"impyla>=0.18.0,<1.0"
],
Expand Down Expand Up @@ -265,7 +265,7 @@
},
"apache.pinot": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pinotdb>=5.1.0"
],
Expand Down Expand Up @@ -421,7 +421,7 @@
"deps": [
"apache-airflow>=2.8.0",
"more-itertools>=9.0.0",
"sqlparse>=0.4.2"
"sqlparse>=0.5.1"
],
"devel-deps": [],
"plugins": [],
Expand All @@ -434,7 +434,7 @@
"databricks": {
"deps": [
"aiohttp>=3.9.2, <4",
"apache-airflow-providers-common-sql>=1.10.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"databricks-sql-connector>=2.0.0, <3.0.0, !=2.9.0",
"mergedeep>=1.3.4",
Expand Down Expand Up @@ -545,7 +545,7 @@
},
"elasticsearch": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"elasticsearch>=8.10,<9"
],
Expand All @@ -559,7 +559,7 @@
},
"exasol": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
Expand Down Expand Up @@ -632,7 +632,7 @@
"deps": [
"PyOpenSSL>=23.0.0",
"apache-airflow-providers-common-compat>=1.2.1",
"apache-airflow-providers-common-sql>=1.7.2",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"asgiref>=3.5.2",
"dill>=0.2.3",
Expand Down Expand Up @@ -787,7 +787,7 @@
},
"jdbc": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"jaydebeapi>=1.1.1"
],
Expand Down Expand Up @@ -855,7 +855,7 @@
},
"microsoft.mssql": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"methodtools>=0.4.7",
"pymssql>=2.3.0"
Expand Down Expand Up @@ -906,7 +906,7 @@
},
"mysql": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"mysql-connector-python>=8.0.29",
"mysqlclient>=1.4.0; sys_platform != 'darwin'"
Expand Down Expand Up @@ -937,7 +937,7 @@
},
"odbc": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pyodbc>=5.0.0"
],
Expand Down Expand Up @@ -973,7 +973,7 @@
"openlineage": {
"deps": [
"apache-airflow-providers-common-compat>=1.2.1",
"apache-airflow-providers-common-sql>=1.6.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"attrs>=22.2",
"openlineage-integration-common>=1.24.2",
Expand Down Expand Up @@ -1017,7 +1017,7 @@
},
"oracle": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"oracledb>=2.0.0"
],
Expand Down Expand Up @@ -1083,7 +1083,7 @@
},
"postgres": {
"deps": [
"apache-airflow-providers-common-sql>=1.17.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"psycopg2-binary>=2.9.4"
],
Expand All @@ -1099,7 +1099,7 @@
},
"presto": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
Expand Down Expand Up @@ -1214,7 +1214,7 @@
},
"slack": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"slack_sdk>=3.19.0"
],
Expand All @@ -1239,7 +1239,7 @@
"snowflake": {
"deps": [
"apache-airflow-providers-common-compat>=1.1.0",
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
Expand All @@ -1260,7 +1260,7 @@
},
"sqlite": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0"
],
"devel-deps": [],
Expand All @@ -1285,7 +1285,7 @@
},
"standard": {
"deps": [
"apache-airflow-providers-common-sql>=1.18.0",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0"
],
"devel-deps": [],
Expand Down Expand Up @@ -1318,7 +1318,7 @@
},
"teradata": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"teradatasql>=17.20.0.28",
"teradatasqlalchemy>=17.20.0.0"
Expand All @@ -1335,7 +1335,7 @@
},
"trino": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"pandas>=1.5.3,<2.2;python_version<\"3.9\"",
"pandas>=2.1.2,<2.2;python_version>=\"3.9\"",
Expand All @@ -1353,7 +1353,7 @@
},
"vertica": {
"deps": [
"apache-airflow-providers-common-sql>=1.3.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"vertica-python>=0.6.0"
],
Expand Down Expand Up @@ -1393,7 +1393,7 @@
},
"ydb": {
"deps": [
"apache-airflow-providers-common-sql>=1.14.1",
"apache-airflow-providers-common-sql>=1.20.0",
"apache-airflow>=2.8.0",
"ydb-dbapi>=0.1.0",
"ydb>=3.18.8"
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/amazon/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ versions:
dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-compat>=1.2.1
- apache-airflow-providers-common-sql>=1.3.1
- apache-airflow-providers-common-sql>=1.20.0
- apache-airflow-providers-http
# We should update minimum version of boto3 and here regularly to avoid `pip` backtracking with the number
# of candidates to consider. Make sure to configure boto3 version here as well as in all the tools below
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/apache/drill/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.14.1
- apache-airflow-providers-common-sql>=1.20.0
- sqlalchemy-drill>=1.1.0

integrations:
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/apache/druid/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.14.1
- apache-airflow-providers-common-sql>=1.20.0
- pydruid>=0.4.1

integrations:
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/apache/hive/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.3.1
- apache-airflow-providers-common-sql>=1.20.0
- hmsclient>=0.1.0
# In pandas 2.2 minimal version of the sqlalchemy is 2.0
# https://pandas.pydata.org/docs/whatsnew/v2.2.0.html#increased-minimum-versions-for-dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ versions:

dependencies:
- impyla>=0.18.0,<1.0
- apache-airflow-providers-common-sql>=1.14.1
- apache-airflow-providers-common-sql>=1.20.0
- apache-airflow>=2.8.0

additional-extras:
Expand Down
2 changes: 1 addition & 1 deletion providers/src/airflow/providers/apache/pinot/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- apache-airflow-providers-common-sql>=1.14.1
- apache-airflow-providers-common-sql>=1.20.0
- pinotdb>=5.1.0

integrations:
Expand Down
15 changes: 12 additions & 3 deletions providers/src/airflow/providers/common/sql/hooks/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ class DbApiHook(BaseHook):
conn_name_attr: str
# Override to have a default connection id for a particular dbHook
default_conn_name = "default_conn_id"
# Override if this db doesn't support semicolons in SQL queries
strip_semicolon = False
# Override if this db supports autocommit.
supports_autocommit = False
# Override if this db supports executemany.
Expand Down Expand Up @@ -369,14 +371,18 @@ def strip_sql_string(sql: str) -> str:
return sql.strip().rstrip(";")

@staticmethod
def split_sql_string(sql: str) -> list[str]:
def split_sql_string(sql: str, strip_semicolon: bool = False) -> list[str]:
"""
Split string into multiple SQL expressions.
:param sql: SQL string potentially consisting of multiple expressions
:param strip_semicolon: whether to strip semicolon from SQL string
:return: list of individual expressions
"""
splits = sqlparse.split(sqlparse.format(sql, strip_comments=True))
splits = sqlparse.split(
sql=sqlparse.format(sql, strip_comments=True),
strip_semicolon=strip_semicolon,
)
return [s for s in splits if s]

@property
Expand Down Expand Up @@ -471,7 +477,10 @@ def run(

if isinstance(sql, str):
if split_statements:
sql_list: Iterable[str] = self.split_sql_string(sql)
sql_list: Iterable[str] = self.split_sql_string(
sql=sql,
strip_semicolon=self.strip_semicolon,
)
else:
sql_list = [sql] if sql.strip() else []
else:
Expand Down
3 changes: 2 additions & 1 deletion providers/src/airflow/providers/common/sql/hooks/sql.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class ConnectorProtocol(Protocol):
class DbApiHook(BaseHook):
conn_name_attr: str
default_conn_name: str
strip_semicolon: bool
supports_autocommit: bool
supports_executemany: bool
connector: ConnectorProtocol | None
Expand Down Expand Up @@ -93,7 +94,7 @@ class DbApiHook(BaseHook):
@staticmethod
def strip_sql_string(sql: str) -> str: ...
@staticmethod
def split_sql_string(sql: str) -> list[str]: ...
def split_sql_string(sql: str, strip_semicolon: bool = False) -> list[str]: ...
@property
def last_description(self) -> Sequence[Sequence] | None: ...
@overload
Expand Down
3 changes: 2 additions & 1 deletion providers/src/airflow/providers/common/sql/provider.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ state: ready
source-date-epoch: 1730012422
# note that those versions are maintained by release manager - do not update them manually
versions:
- 1.20.0
- 1.19.0
- 1.18.0
- 1.17.1
Expand Down Expand Up @@ -64,7 +65,7 @@ versions:

dependencies:
- apache-airflow>=2.8.0
- sqlparse>=0.4.2
- sqlparse>=0.5.1
- more-itertools>=9.0.0

additional-extras:
Expand Down
Loading

0 comments on commit e7194df

Please sign in to comment.