Skip to content

Commit

Permalink
[Coral-Trino] Avoid accidental translation of Trino from_unixtime S…
Browse files Browse the repository at this point in the history
…QL call (#467)

Co-authored-by: Walaa Eldin Moustafa <wmoustafa@linkedin.com>
  • Loading branch information
findinpath and wmoustafa authored Oct 23, 2023
1 parent 761b209 commit d53af31
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import com.linkedin.coral.common.functions.SameOperandTypeExceptFirstOperandChecker;

import static com.linkedin.coral.hive.hive2rel.functions.CoalesceStructUtility.*;
import static com.linkedin.coral.hive.hive2rel.functions.TimestampFromUnixtime.TIMESTAMP_FROM_UNIXTIME;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.*;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.*;
import static org.apache.calcite.sql.type.OperandTypes.*;
Expand Down Expand Up @@ -320,6 +321,7 @@ public boolean isOptional(int i) {
// Date Functions
createAddUserDefinedFunction("from_unixtime", FunctionReturnTypes.STRING,
family(ImmutableList.of(SqlTypeFamily.NUMERIC, SqlTypeFamily.STRING), optionalOrd(1)));
addFunctionEntry("timestamp_from_unixtime", TIMESTAMP_FROM_UNIXTIME);
createAddUserDefinedFunction("unix_timestamp", BIGINT,
family(ImmutableList.of(SqlTypeFamily.STRING, SqlTypeFamily.STRING), optionalOrd(ImmutableList.of(0, 1))));
createAddUserDefinedFunction("to_date", FunctionReturnTypes.STRING, or(STRING, DATETIME));
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
package com.linkedin.coral.hive.hive2rel.functions;

import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlCallBinding;
import org.apache.calcite.sql.SqlFunction;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlIdentifier;
import org.apache.calcite.sql.SqlLiteral;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlOperandCountRange;
import org.apache.calcite.sql.SqlUtil;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.parser.SqlParserPos;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.SqlOperandCountRanges;
import org.apache.calcite.sql.type.SqlTypeFamily;
import org.apache.calcite.sql.type.SqlTypeName;

import static org.apache.calcite.sql.type.ReturnTypes.explicit;


public class TimestampFromUnixtime extends SqlFunction {

public static final TimestampFromUnixtime TIMESTAMP_FROM_UNIXTIME = new TimestampFromUnixtime();

private TimestampFromUnixtime() {
super(new SqlIdentifier("timestamp_from_unixtime", SqlParserPos.ZERO), explicit(SqlTypeName.TIMESTAMP), null, null,
null, SqlFunctionCategory.TIMEDATE);
}

@Override
public SqlOperandCountRange getOperandCountRange() {
return SqlOperandCountRanges.between(1, 3);
}

@Override
public boolean checkOperandTypes(SqlCallBinding callBinding, boolean throwOnFailure) {
final SqlNode firstOperand = callBinding.operand(0);
if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, firstOperand, 0,
throwOnFailure)) {
return false;
}

if (callBinding.getOperandCount() == 2) {
final SqlNode secondOperand = callBinding.operand(1);
if (!OperandTypes.family(SqlTypeFamily.STRING).checkSingleOperandType(callBinding, secondOperand, 0,
throwOnFailure)) {
return false;
}
}

if (callBinding.getOperandCount() == 3) {
final SqlNode secondOperand = callBinding.operand(1);
final SqlNode thirdOperand = callBinding.operand(2);
if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, secondOperand, 0,
throwOnFailure)) {
return false;
}
if (!OperandTypes.family(SqlTypeFamily.NUMERIC).checkSingleOperandType(callBinding, thirdOperand, 0,
throwOnFailure)) {
return false;
}
}
return false;
}

@Override
public void unparse(SqlWriter writer, SqlCall call, int leftPrec, int rightPrec) {
SqlUtil.unparseSqlIdentifierSyntax(writer, new SqlIdentifier("from_unixtime", SqlParserPos.ZERO), true);
final SqlWriter.Frame frame = writer.startList(SqlWriter.FrameTypeEnum.FUN_CALL, "(", ")");
final SqlLiteral quantifier = call.getFunctionQuantifier();
if (quantifier != null) {
quantifier.unparse(writer, 0, 0);
}
for (SqlNode operand : call.getOperandList()) {
writer.sep(",");
operand.unparse(writer, 0, 0);
}
writer.endList(frame);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.parser.SqlParserPos;

import static com.linkedin.coral.hive.hive2rel.functions.TimestampFromUnixtime.TIMESTAMP_FROM_UNIXTIME;


public class TrinoSqlDialect extends SqlDialect {
private static final String IDENTIFIER_QUOTE_STRING = "\"";
Expand Down Expand Up @@ -74,7 +76,11 @@ public void unparseCall(SqlWriter writer, SqlCall call, int leftPrec, int rightP
unparseMapValueConstructor(writer, call, leftPrec, rightPrec);
break;
default:
super.unparseCall(writer, call, leftPrec, rightPrec);
if (call.getOperator().getName().equals("timestamp_from_unixtime")) {
TIMESTAMP_FROM_UNIXTIME.unparse(writer, call, leftPrec, rightPrec);
} else {
super.unparseCall(writer, call, leftPrec, rightPrec);
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public class FromUtcTimestampOperatorTransformer extends SqlCallTransformer {
private static final String WITH_TIMEZONE = "with_timezone";
private static final String TO_UNIXTIME = "to_unixtime";
private static final String FROM_UNIXTIME_NANOS = "from_unixtime_nanos";
private static final String FROM_UNIXTIME = "from_unixtime";
private static final String TIMESTAMP_FROM_UNIXTIME = "timestamp_from_unixtime";
private static final String CANONICALIZE_HIVE_TIMEZONE_ID = "$canonicalize_hive_timezone_id";

public FromUtcTimestampOperatorTransformer(TypeDerivationUtil typeDerivationUtil) {
Expand Down Expand Up @@ -86,7 +86,7 @@ protected SqlCall transform(SqlCall sqlCall) {
SqlOperator trinoFromUnixtimeNanos =
createSqlOperator(FROM_UNIXTIME_NANOS, explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoFromUnixTime =
createSqlOperator(FROM_UNIXTIME, explicit(TIMESTAMP /* should be WITH TIME ZONE */));
createSqlOperator(TIMESTAMP_FROM_UNIXTIME, explicit(TIMESTAMP /* should be WITH TIME ZONE */));
SqlOperator trinoCanonicalizeHiveTimezoneId = createSqlOperator(CANONICALIZE_HIVE_TIMEZONE_ID, explicit(VARCHAR));

SqlCall canonicalizeHiveTimezoneIdSqlCall = trinoCanonicalizeHiveTimezoneId.createCall(ZERO, timezone);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ public Object[][] viewTestCasesProvider() {
{ "test", "get_json_object_view", "SELECT \"json_extract\"(\"tablea\".\"b\".\"b1\", '$.name')\n"
+ "FROM \"test\".\"tablea\" AS \"tablea\"" },

{ "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_date\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n"
{ "test", "view_from_utc_timestamp", "SELECT CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_tinyint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_smallint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_integer\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime_nanos\"(CAST(\"table_from_utc_timestamp\".\"a_bigint\" AS BIGINT) * 1000000), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_float\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_double\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp\".\"a_decimal_zero\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp\".\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3))\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp\"" },

{ "test", "date_calculation_view", "SELECT \"date\"(CAST(\"substr\"('2021-08-20', 1, 10) AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP)), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20' AS TIMESTAMP))), \"date_add\"('day', 1 * -1, \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-21' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19' AS TIMESTAMP)), \"date\"(CAST('2021-08-20' AS TIMESTAMP))) AS INTEGER), CAST(\"date_diff\"('day', \"date\"(CAST('2021-08-19 23:59:59' AS TIMESTAMP)), \"date\"(CAST('2021-08-20 00:00:00' AS TIMESTAMP))) AS INTEGER)\n"
Expand Down Expand Up @@ -560,7 +560,7 @@ public void testCastNestedTimestampToDecimal() {
relNode = TestUtils.getHiveToRelConverter().convertSql(
"SELECT CAST(from_utc_timestamp(a_date, 'America/Los_Angeles') AS DECIMAL(10, 0)) AS d\nFROM test.table_from_utc_timestamp");
targetSql =
"SELECT CAST(\"to_unixtime\"(\"with_timezone\"(CAST(\"at_timezone\"(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp0\".\"a_date\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss'), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), 'UTC')) AS DECIMAL(10, 0)) AS \"d\"\n"
"SELECT CAST(\"to_unixtime\"(\"with_timezone\"(CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp0\".\"a_date\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('America/Los_Angeles')) AS TIMESTAMP(3)), 'UTC')) AS DECIMAL(10, 0)) AS \"d\"\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp0\"";
expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
Expand All @@ -581,15 +581,15 @@ public void testSubstrWithTimestampOperator() {
relNode = TestUtils.getHiveToRelConverter().convertSql(
"SELECT substring(from_utc_timestamp(a_decimal_three,'PST'),1,10) AS d\nFROM test.table_from_utc_timestamp");
targetSql =
"SELECT \"substr\"(CAST(CAST(\"at_timezone\"(CAST(\"format_datetime\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp0\".\"a_decimal_three\" AS DOUBLE)), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n"
"SELECT \"substr\"(CAST(CAST(\"at_timezone\"(\"from_unixtime\"(CAST(\"table_from_utc_timestamp0\".\"a_decimal_three\" AS DOUBLE)), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp0\"";
expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);

relNode = TestUtils.getHiveToRelConverter().convertSql(
"SELECT substring(from_utc_timestamp(a_timestamp,'PST'),1,10) AS d\nFROM test.table_from_utc_timestamp");
targetSql =
"SELECT \"substr\"(CAST(CAST(\"at_timezone\"(CAST(\"format_datetime\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp1\".\"a_timestamp\", 'UTC'))), 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n"
"SELECT \"substr\"(CAST(CAST(\"at_timezone\"(\"from_unixtime\"(\"to_unixtime\"(\"with_timezone\"(\"table_from_utc_timestamp1\".\"a_timestamp\", 'UTC'))), \"$canonicalize_hive_timezone_id\"('PST')) AS TIMESTAMP(3)) AS VARCHAR(65535)), 1, 10) AS \"d\"\n"
+ "FROM \"test\".\"table_from_utc_timestamp\" AS \"table_from_utc_timestamp1\"";
expandedSql = relToTrinoConverter.convert(relNode);
assertEquals(expandedSql, targetSql);
Expand Down

0 comments on commit d53af31

Please sign in to comment.