Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Coral-Trino] Fix substr start index issue #434

Merged
merged 4 commits into from
Dec 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ derby.log
ligradle
.DS_Store
*.patch
*/metastore_db
*/metastore_db
.pyc
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public CoralToTrinoSqlCallConverter(Map<String, Boolean> configs) {
new CoralRegistryOperatorRenameSqlCallTransformer("nvl", 2, "coalesce"),
// array and map functions
new MapValueConstructorTransformer(),
new OperatorRenameSqlCallTransformer(SqlStdOperatorTable.SUBSTRING, 3, "SUBSTR"),

new SourceOperatorMatchSqlCallTransformer("item", 2) {
@Override
protected SqlCall transform(SqlCall sqlCall) {
Expand All @@ -72,7 +72,16 @@ protected SqlCall transform(SqlCall sqlCall) {
"{\"op\":\"/\",\"operands\":[{\"input\":0},{\"op\":\"^\",\"operands\":[{\"value\":10},{\"input\":2}]}]}",
null),
// string functions
new OperatorRenameSqlCallTransformer(SqlStdOperatorTable.SUBSTRING, 2, "SUBSTR"),
new JsonTransformSqlCallTransformer(SqlStdOperatorTable.SUBSTRING, 2, "substr",
"[{\"input\": 1}, {\"op\": \"+\", \"operands\": [{\"input\": 2}, {\"value\": 1}]}]", null, null),
new JsonTransformSqlCallTransformer(SqlStdOperatorTable.SUBSTRING, 3, "substr",
"[{\"input\": 1}, {\"op\": \"+\", \"operands\": [{\"input\": 2}, {\"value\": 1}]}, {\"input\": 3}]", null,
null),
new JsonTransformSqlCallTransformer(hiveToCoralSqlOperator("substr"), 2, "substr",
"[{\"input\": 1}, {\"op\": \"+\", \"operands\": [{\"input\": 2}, {\"value\": 1}]}]", null, null),
new JsonTransformSqlCallTransformer(hiveToCoralSqlOperator("substr"), 3, "substr",
jerryleooo marked this conversation as resolved.
Show resolved Hide resolved
"[{\"input\": 1}, {\"op\": \"+\", \"operands\": [{\"input\": 2}, {\"value\": 1}]}, {\"input\": 3}]", null,
null),
// JSON functions
new CoralRegistryOperatorRenameSqlCallTransformer("get_json_object", 2, "json_extract"),
// map various hive functions
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ public Object[][] viewTestCasesProvider() {
{ "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_date\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp\"" },

{ "test", "date_calculation_view", "SELECT \"date\"(CAST(\"substr\"('2021-08-20', 1, 10) AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP)), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-21' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19 23:59:59' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))) AS INTEGER)\n"
jerryleooo marked this conversation as resolved.
Show resolved Hide resolved
{ "test", "date_calculation_view", "SELECT \"date\"(CAST(\"substr\"('2021-08-20', 1 + 1, 10) AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP)), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-21' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19 23:59:59' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))) AS INTEGER)\n"
+ "FROM \"test\".\"tablea\" AS \"tablea\"" },

{ "test", "pmod_view", "SELECT MOD(MOD(- 9, 4) + 4, 4)\n" + "FROM \"test\".\"tablea\" AS \"tablea\"" },
Expand Down Expand Up @@ -306,7 +306,7 @@ public void testLateralViewOuterPosExplodeWithAlias() {
public void testAvoidTransformToDate() {
RelNode relNode = TestUtils.getHiveToRelConverter()
.convertSql("SELECT to_date(substr('2021-08-20', 1, 10)), to_date('2021-08-20')" + "FROM test.tableA");
String targetSql = "SELECT \"to_date\"(\"substr\"('2021-08-20', 1, 10)), \"to_date\"('2021-08-20')\n"
String targetSql = "SELECT \"to_date\"(\"substr\"('2021-08-20', 1 + 1, 10)), \"to_date\"('2021-08-20')\n"
+ "FROM \"test\".\"tablea\" AS \"tablea\"";

RelToTrinoConverter relToTrinoConverter =
Expand Down Expand Up @@ -603,15 +603,15 @@ public void testSubstrWithTimestamp() {
RelNode relNode = TestUtils.getHiveToRelConverter()
.convertSql("SELECT SUBSTR(a_timestamp, 12, 8) AS d\nFROM test.table_from_utc_timestamp");
String targetSql =
"SELECT \"substr\"(CAST(\"table_from_utc_timestamp\".\"a_timestamp\" AS VARCHAR(65535)), 12, 8) AS \"d\"\n"
"SELECT \"substr\"(CAST(\"table_from_utc_timestamp\".\"a_timestamp\" AS VARCHAR(65535)), 12 + 1, 8) AS \"d\"\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp\"";
String expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);

relNode = TestUtils.getHiveToRelConverter()
.convertSql("SELECT SUBSTRING(a_timestamp, 12, 8) AS d\nFROM test.table_from_utc_timestamp");
targetSql =
"SELECT \"substr\"(CAST(\"table_from_utc_timestamp0\".\"a_timestamp\" AS VARCHAR(65535)), 12, 8) AS \"d\"\n"
"SELECT \"substr\"(CAST(\"table_from_utc_timestamp0\".\"a_timestamp\" AS VARCHAR(65535)), 12 + 1, 8) AS \"d\"\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp0\"";
expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
Expand All @@ -624,8 +624,8 @@ public void testAliasOrderBy() {
RelNode relNode = TestUtils.getHiveToRelConverter()
.convertSql("SELECT a, SUBSTR(b, 1, 1) AS aliased_column, c FROM test.tabler ORDER BY aliased_column DESC");
String targetSql =
"SELECT \"tabler\".\"a\" AS \"a\", \"substr\"(\"tabler\".\"b\", 1, 1) AS \"aliased_column\", \"tabler\".\"c\" AS \"c\"\n"
+ "FROM \"test\".\"tabler\" AS \"tabler\"\n" + "ORDER BY \"substr\"(\"tabler\".\"b\", 1, 1) DESC";
"SELECT \"tabler\".\"a\" AS \"a\", \"substr\"(\"tabler\".\"b\", 1 + 1, 1) AS \"aliased_column\", \"tabler\".\"c\" AS \"c\"\n"
+ "FROM \"test\".\"tabler\" AS \"tabler\"\n" + "ORDER BY \"substr\"(\"tabler\".\"b\", 1 + 1, 1) DESC";
String expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
}
Expand All @@ -636,9 +636,10 @@ public void testAliasHaving() {

RelNode relNode = TestUtils.getHiveToRelConverter().convertSql(
"SELECT a, SUBSTR(b, 1, 1) AS aliased_column FROM test.tabler GROUP BY a, b HAVING aliased_column in ('dummy_value')");
String targetSql = "SELECT \"tabler\".\"a\" AS \"a\", \"substr\"(\"tabler\".\"b\", 1, 1) AS \"aliased_column\"\n"
+ "FROM \"test\".\"tabler\" AS \"tabler\"\n" + "GROUP BY \"tabler\".\"a\", \"tabler\".\"b\"\n"
+ "HAVING \"substr\"(\"tabler\".\"b\", 1, 1)\n" + "IN ('dummy_value')";
String targetSql =
"SELECT \"tabler\".\"a\" AS \"a\", \"substr\"(\"tabler\".\"b\", 1 + 1, 1) AS \"aliased_column\"\n"
+ "FROM \"test\".\"tabler\" AS \"tabler\"\n" + "GROUP BY \"tabler\".\"a\", \"tabler\".\"b\"\n"
+ "HAVING \"substr\"(\"tabler\".\"b\", 1 + 1, 1)\n" + "IN ('dummy_value')";
String expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -357,16 +357,33 @@ public void testTruncate() {
}

@Test
public void testSubString2() {
public void testSubStr2() {
String sql = "SELECT SUBSTR(scol, 1) FROM test.tableOne";
String expectedSql = "SELECT \"substr\"(\"tableone\".\"scol\", 1)\n" + "FROM \"test\".\"tableone\" AS \"tableone\"";
String expectedSql =
"SELECT \"substr\"(\"tableone\".\"scol\", 1 + 1)\n" + "FROM \"test\".\"tableone\" AS \"tableone\"";
testConversion(sql, expectedSql);
}

@Test
public void testSubString3() {
public void testSubString2() {
String sql = "SELECT SUBSTRING(scol, 1) FROM test.tableOne";
String expectedSql =
"SELECT \"substr\"(\"tableone\".\"scol\", 1 + 1)\n" + "FROM \"test\".\"tableone\" AS \"tableone\"";
testConversion(sql, expectedSql);
}

@Test
public void testSubStr3() {
String sql = "SELECT SUBSTR(scol, icol, 3) FROM test.tableOne";
String expectedSql = "SELECT \"substr\"(\"tableone\".\"scol\", \"tableone\".\"icol\", 3)\n"
String expectedSql = "SELECT \"substr\"(\"tableone\".\"scol\", \"tableone\".\"icol\" + 1, 3)\n"
+ "FROM \"test\".\"tableone\" AS \"tableone\"";
testConversion(sql, expectedSql);
}

@Test
public void testSubString3() {
String sql = "SELECT SUBSTRING(scol, icol, 3) FROM test.tableOne";
String expectedSql = "SELECT \"substr\"(\"tableone\".\"scol\", \"tableone\".\"icol\" + 1, 3)\n"
+ "FROM \"test\".\"tableone\" AS \"tableone\"";
testConversion(sql, expectedSql);
}
Expand Down
Loading