From fb37c28c9b5e36e64b724d50d7b33c15d0967362 Mon Sep 17 00:00:00 2001 From: featzhang Date: Fri, 23 Dec 2022 00:19:14 +0800 Subject: [PATCH 1/3] [INLONG-7030][Manager][Sort] Build tool for local debugging environment --- .gitignore | 5 + inlong-tools/dev/README.md | 138 ++++++++++++++++++++++++ inlong-tools/dev/inlong-dev-toolkit.sh | 143 +++++++++++++++++++++++++ 3 files changed, 286 insertions(+) create mode 100644 inlong-tools/dev/README.md create mode 100755 inlong-tools/dev/inlong-dev-toolkit.sh diff --git a/.gitignore b/.gitignore index 4c0d2df5c8e..ebb9d3011c4 100644 --- a/.gitignore +++ b/.gitignore @@ -71,3 +71,8 @@ inlong-sdk/dataproxy-sdk-twins/dataproxy-sdk-cpp/release/bin/ # Docker mysql storage docker/docker-compose/mysql + +/inlong-sort/connectors +/plugins +/inlong-sort/sort-dist.jar +/inlong-manager/plugins \ No newline at end of file diff --git a/inlong-tools/dev/README.md b/inlong-tools/dev/README.md new file mode 100644 index 00000000000..95ba5a0ec77 --- /dev/null +++ b/inlong-tools/dev/README.md @@ -0,0 +1,138 @@ + + +## Apache InLong dev toolkit + +### Overview + +The Apache InLong project includes modules such as "Dashboard", "DataProxy", "Manager" and "TubeMQ". +When debugging in a development environment such as `Intellij IDEA`, +The environment is difficult to build. + +Take the `inlong-manager` as an example, it depends on too many configurations and packages. +When debugging locally in the IDE, multiple default directories need to be created, which is more complicated. +So a script is urgently needed to quickly support local debugging. + +Also, similar tools are required for local debugging of other modules. +Temporarily named `InLong dev toolkit`, looking forward to adding more features. + +### Use case + +#### Help + +```shell +❯ inlong-tools/dev/inlong-dev-toolkit.sh +#################################################################################### +# Welcome to use the Apache InLong dev toolkit! # +# @2022-12-23 20:38:34 # +#################################################################################### + + +inlong-tools/dev/inlong-dev-toolkit.sh help | h + :help + +inlong-tools/dev/inlong-dev-toolkit.sh manager | m + :build manager local debugging environment + +Have a nice day, bye! + +``` + +#### Manager + +Rely of Manager: + +| Directory/File | link target | Rely modules | +|--------------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------|---------------------------------| +| `./plugins` | `inlong-manager/manger-plugins/target/plugins` | `inlong-sort/sort-plugin` | +| `./inlong-sort/connectors` | - | `inlong-sort/sort-connectors/*` | | +| `./inlong-sort/connectors/sort-connector-${name}-${project.version}.jar` | `inlong-sort/sort-connectors/${connector.name}/target/sort-connector-${connector.name}-${project.version}.jar` | `inlong-sort/sort-connectors/*` | +| `./sort-dist.jar` | `sort-dist/target/sort-dist-${project.version}.jar` | `inlong-sort/sort-dist` | + +```shell +❯ inlong-tools/dev/inlong-dev-toolkit.sh m +Execute action: manager +# start build manager local debugging environment ... +current_version: 1.5.0-SNAPSHOT +associate plugins directory: inlong-manager/manager-plugins/target/plugins +associate sort dist: inlong-sort/sort-dist/target/sort-dist +recreate connector dir: inlong-sort/connectors +All connector names: +hive mysql-cdc kafka jdbc pulsar iceberg postgres-cdc mongodb-cdc sqlserver-cdc oracle-cdc elasticsearch-6 elasticsearch-7 redis tubemq filesystem doris starrocks hudi +associate connector: hive +associate connector: mysql-cdc +associate connector: kafka +associate connector: jdbc +associate connector: pulsar +associate connector: iceberg +associate connector: postgres-cdc +associate connector: mongodb-cdc +associate connector: sqlserver-cdc +associate connector: oracle-cdc +associate connector: elasticsearch-6 +associate connector: elasticsearch-7 +associate connector: redis +associate connector: tubemq +associate connector: filesystem +associate connector: doris +associate connector: starrocks +associate connector: hudi +build dev env of manager finished. +Have a nice day, bye! +``` + +### Development: add more function + +#### Public variables + +| inner variable | implication | +|--------------------|-----------------------------------| +| `script_dir` | the directory of this script file | +| `script_file_name` | the script file name | +| `base_dir` | the root directory of project | + +#### Step 1. implement function + +#### Step 2. register function + +Create a global array variable: + +- It is recommended to end with `_action`, such as `manager_action`. +- It must have 4 elements. +- The 1st element is the long opt command. +- The 2nd element is the sort opt command. +- The 3rd element is the details of the function. +- The 4th element is the function name, which implements in Step 1. + +Add the variable to the `actions` array, like: + +```shell +actions=( + help_action + manger_action +) +``` + +#### Step 3. ignore the temporary file + +Add temporary files to `.gitignore` + +> Notice: must base on project base directory diff --git a/inlong-tools/dev/inlong-dev-toolkit.sh b/inlong-tools/dev/inlong-dev-toolkit.sh new file mode 100755 index 00000000000..89b25395362 --- /dev/null +++ b/inlong-tools/dev/inlong-dev-toolkit.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# Initialize the configuration files of inlong components + +script_dir=$(dirname "$0") +script_file_name=$(basename "$0") + +# the absolute dir of project +base_dir=$( + cd $script_dir + cd ../.. + pwd +) + +help_action=( + 'help' + 'h' + 'help: show all actions' + 'welcome' +) + +manager_action=( + 'manager' + 'm' + 'build manager local debugging environment' + 'manager' +) + +actions=( + help_action + manager_action +) + +function welcome() { + local_date_time=$(date +"%Y-%m-%d %H:%M:%S") + echo '####################################################################################' + echo '# Welcome to use Apache InLong dev toolkit ! #' + echo '# @'$local_date_time' #' + echo '####################################################################################' + echo '' + + # shellcheck disable=SC2068 + for action in ${actions[@]}; do + # shellcheck disable=SC1087 + TMP=$action[@] + TempB=("${!TMP}") + name=${TempB[0]} + simple_name=${TempB[1]} + desc=${TempB[2]} + + echo $script_dir'/'$script_file_name' '$name' | '$simple_name + echo ' :'$desc + done + echo '' +} + +function manager() { + echo '# start build manager local debugging environment ...' + + project_version=$(mvn -q \ + -Dexec.executable=echo \ + -Dexec.args='${project.version}' \ + --non-recursive \ + exec:exec) + + echo 'current_version: '"$project_version" + # + echo 'associate plugins directory: inlong-manager/manager-plugins/target/plugins' + # plugins -> manager-plugins/target/plugins + cd "$base_dir" + rm -rf plugins + ln -s inlong-manager/manager-plugins/target/plugins plugins + # + echo 'associate sort dist: inlong-sort/sort-dist/target/sort-dist' + cd "$base_dir"/inlong-sort + rm -rf sort-dist.jar + ln -s sort-dist/target/sort-dist-"$project_version".jar sort-dist.jar + # inlong-manager: plugins -> manager-plugins/target/plugins + cd "$base_dir"/inlong-manager + rm -rf plugins + ln -s manager-plugins/target/plugins plugins + # mkdir inlong-sort/connectors + sort_connector_dir=$base_dir/inlong-sort/connectors + echo 'recreate connector dir: '"$sort_connector_dir" + # shellcheck disable=SC2086 + rm -rf $sort_connector_dir + mkdir "$sort_connector_dir" + cd "$sort_connector_dir" + connector_names=$(grep '' "$base_dir"/inlong-sort/sort-connectors/pom.xml | sed 's///g' | sed 's/<\/module>//g' | grep -v base) + + echo 'All connector names: ' + echo $connector_names | tr -d '\n' + + for connector_name in $connector_names; do + echo 'associate connector: '"$connector_name" + connector_suffix_name=$(echo "$connector_name" | sed 's/elasticsearch-6/elasticsearch6/g' | sed 's/elasticsearch-7/elasticsearch7/g') + ln -s ../sort-connectors/"${connector_name}"/target/sort-connector-"${connector_suffix_name}"-"$project_version".jar sort-connector-"${connector_name}".jar + done + + echo 'build dev env of manager finished .' +} + +function main() { + action=$1 + + if [ ! -n "$action" ]; then + welcome + fi + + # shellcheck disable=SC2068 + for one_action in ${actions[@]}; do + # shellcheck disable=SC1087 + TMP=$one_action[@] + TempB=("${!TMP}") + name=${TempB[0]} + simple_name=${TempB[1]} + function_name=${TempB[3]} + desc=${TempB[4]} + + if [[ X"$name" == X"$action" ]] || [[ X"$simple_name" == X"$action" ]]; then + echo 'Execute action: '"$function_name" + $function_name + fi + done + + echo 'Have a nice day, bye!' +} + +main $1 From 416016a11526605521bf1f80cc24af13565ff078 Mon Sep 17 00:00:00 2001 From: featzhang Date: Tue, 27 Dec 2022 23:21:21 +0800 Subject: [PATCH 2/3] [INLONG-7072][Manager][Sort] Resource adaptive adjustment for Hudi --- inlong-dashboard/src/locales/cn.json | 2 ++ inlong-dashboard/src/locales/en.json | 2 ++ inlong-dashboard/src/metas/sinks/defaults/Hudi.ts | 13 +++++++++++++ 3 files changed, 17 insertions(+) diff --git a/inlong-dashboard/src/locales/cn.json b/inlong-dashboard/src/locales/cn.json index e1d316d4f49..daa35f63574 100644 --- a/inlong-dashboard/src/locales/cn.json +++ b/inlong-dashboard/src/locales/cn.json @@ -209,6 +209,8 @@ "meta.Sinks.Hudi.PartitionFieldListHelp": "字段类型若为timestamp,则必须设置此字段值的格式,支持 MICROSECONDS,MILLISECONDS,SECONDS,SQL,ISO_8601,以及自定义,比如:yyyy-MM-dd HH:mm:ss 等", "meta.Sinks.Hudi.FieldFormat": "字段格式", "meta.Sinks.Hudi.ExtListHelper": "hudi表的DDL属性需带前缀'ddl.'", + "meta.Sinks.Hudi.SourceRecordUnit": "条", + "meta.Sinks.Hudi.SourceRecordPreDay": "天数据量", "meta.Sinks.Greenplum.TableName": "表名称", "meta.Sinks.Greenplum.PrimaryKey": "主键", "meta.Sinks.Greenplum.FieldName": "字段名", diff --git a/inlong-dashboard/src/locales/en.json b/inlong-dashboard/src/locales/en.json index d6698660463..85dc67f621a 100644 --- a/inlong-dashboard/src/locales/en.json +++ b/inlong-dashboard/src/locales/en.json @@ -209,6 +209,8 @@ "meta.Sinks.Hudi.PartitionFieldListHelp": "If the field type is timestamp, you must set the format of the field value, support MICROSECONDS, MILLISECONDS, SECONDS, SQL, ISO_8601, and custom, such as: yyyy-MM-dd HH:mm:ss, etc.", "meta.Sinks.Hudi.FieldFormat": "FieldFormat", "meta.Sinks.Hudi.ExtListHelper": "The DDL attribute of the hudi table needs to be prefixed with 'ddl.'", + "meta.Sinks.Hudi.SourceRecordUnit": "row", + "meta.Sinks.Hudi.SourceRecordPreDay": "RecordPreDay", "meta.Sinks.Greenplum.TableName": "TableName", "meta.Sinks.Greenplum.PrimaryKey": "PrimaryKey", "meta.Sinks.Greenplum.FieldName": "FieldName", diff --git a/inlong-dashboard/src/metas/sinks/defaults/Hudi.ts b/inlong-dashboard/src/metas/sinks/defaults/Hudi.ts index ad4648ac247..c627557d80b 100644 --- a/inlong-dashboard/src/metas/sinks/defaults/Hudi.ts +++ b/inlong-dashboard/src/metas/sinks/defaults/Hudi.ts @@ -317,6 +317,19 @@ export default class HudiSink extends SinkInfo implements DataWithBackend, Rende @ColumnDecorator() @I18n('meta.Sinks.Hudi.PrimaryKey') primaryKey: string; + + @FieldDecorator({ + type: 'inputnumber', + initialValue: 1, + props: values => ({ + disabled: [110, 130].includes(values?.status), + min: 1, + }), + rules: [{ required: true }], + suffix: i18n.t('meta.Sinks.Hudi.RecordUnit'), + }) + @I18n('meta.Sinks.Hudi.RecordPreDay') + recordPreDay: number; } const getFieldListColumns = sinkValues => { From e930232a7a20fee65cb8b7c66f835157344b84ac Mon Sep 17 00:00:00 2001 From: featzhang Date: Wed, 28 Dec 2022 00:12:17 +0800 Subject: [PATCH 3/3] [Manager][Sort] Resource adaptive adjustment for Hudi --- inlong-dashboard/src/locales/cn.json | 6 ++++-- inlong-dashboard/src/locales/en.json | 6 ++++-- .../src/metas/sinks/defaults/Hudi.ts | 17 +++++++++++++++-- .../plugin/flink/IntegrationTaskRunner.java | 1 + .../sort/protocol/node/load/HudiLoadNode.java | 1 + 5 files changed, 25 insertions(+), 6 deletions(-) diff --git a/inlong-dashboard/src/locales/cn.json b/inlong-dashboard/src/locales/cn.json index daa35f63574..a156c65e3d2 100644 --- a/inlong-dashboard/src/locales/cn.json +++ b/inlong-dashboard/src/locales/cn.json @@ -209,8 +209,10 @@ "meta.Sinks.Hudi.PartitionFieldListHelp": "字段类型若为timestamp,则必须设置此字段值的格式,支持 MICROSECONDS,MILLISECONDS,SECONDS,SQL,ISO_8601,以及自定义,比如:yyyy-MM-dd HH:mm:ss 等", "meta.Sinks.Hudi.FieldFormat": "字段格式", "meta.Sinks.Hudi.ExtListHelper": "hudi表的DDL属性需带前缀'ddl.'", - "meta.Sinks.Hudi.SourceRecordUnit": "条", - "meta.Sinks.Hudi.SourceRecordPreDay": "天数据量", + "meta.Sinks.Hudi.CapacityPerRowUnit": "KB", + "meta.Sinks.Hudi.CapacityPerRow": "每条大小", + "meta.Sinks.Hudi.RecordPreDay": "天数据量", + "meta.Sinks.Hudi.RecordPreDayUnit": "条", "meta.Sinks.Greenplum.TableName": "表名称", "meta.Sinks.Greenplum.PrimaryKey": "主键", "meta.Sinks.Greenplum.FieldName": "字段名", diff --git a/inlong-dashboard/src/locales/en.json b/inlong-dashboard/src/locales/en.json index 85dc67f621a..7a5160b8b8e 100644 --- a/inlong-dashboard/src/locales/en.json +++ b/inlong-dashboard/src/locales/en.json @@ -209,8 +209,10 @@ "meta.Sinks.Hudi.PartitionFieldListHelp": "If the field type is timestamp, you must set the format of the field value, support MICROSECONDS, MILLISECONDS, SECONDS, SQL, ISO_8601, and custom, such as: yyyy-MM-dd HH:mm:ss, etc.", "meta.Sinks.Hudi.FieldFormat": "FieldFormat", "meta.Sinks.Hudi.ExtListHelper": "The DDL attribute of the hudi table needs to be prefixed with 'ddl.'", - "meta.Sinks.Hudi.SourceRecordUnit": "row", - "meta.Sinks.Hudi.SourceRecordPreDay": "RecordPreDay", + "meta.Sinks.Hudi.RecordPreDayUnit": "row", + "meta.Sinks.Hudi.RecordPreDay": "RecordPreDay", + "meta.Sinks.Hudi.CapacityPerRowUnit": "KB", + "meta.Sinks.Hudi.CapacityPerRow": "CapacityPerRow", "meta.Sinks.Greenplum.TableName": "TableName", "meta.Sinks.Greenplum.PrimaryKey": "PrimaryKey", "meta.Sinks.Greenplum.FieldName": "FieldName", diff --git a/inlong-dashboard/src/metas/sinks/defaults/Hudi.ts b/inlong-dashboard/src/metas/sinks/defaults/Hudi.ts index c627557d80b..3ae8520c75e 100644 --- a/inlong-dashboard/src/metas/sinks/defaults/Hudi.ts +++ b/inlong-dashboard/src/metas/sinks/defaults/Hudi.ts @@ -320,16 +320,29 @@ export default class HudiSink extends SinkInfo implements DataWithBackend, Rende @FieldDecorator({ type: 'inputnumber', - initialValue: 1, + initialValue: 100000, props: values => ({ disabled: [110, 130].includes(values?.status), min: 1, }), rules: [{ required: true }], - suffix: i18n.t('meta.Sinks.Hudi.RecordUnit'), + suffix: i18n.t('meta.Sinks.Hudi.RecordPreDayUnit'), }) @I18n('meta.Sinks.Hudi.RecordPreDay') recordPreDay: number; + + @FieldDecorator({ + type: 'inputnumber', + initialValue: 10, + props: values => ({ + disabled: [110, 130].includes(values?.status), + min: 1, + }), + rules: [{ required: true }], + suffix: i18n.t('meta.Sinks.Hudi.CapacityPerRowUnit'), + }) + @I18n('meta.Sinks.Hudi.CapacityPerRow') + capacityPerRow: number; } const getFieldListColumns = sinkValues => { diff --git a/inlong-manager/manager-plugins/src/main/java/org/apache/inlong/manager/plugin/flink/IntegrationTaskRunner.java b/inlong-manager/manager-plugins/src/main/java/org/apache/inlong/manager/plugin/flink/IntegrationTaskRunner.java index 5d1541549bd..6193818f7a0 100644 --- a/inlong-manager/manager-plugins/src/main/java/org/apache/inlong/manager/plugin/flink/IntegrationTaskRunner.java +++ b/inlong-manager/manager-plugins/src/main/java/org/apache/inlong/manager/plugin/flink/IntegrationTaskRunner.java @@ -39,6 +39,7 @@ public class IntegrationTaskRunner implements Runnable { private final FlinkInfo flinkInfo; private final Integer commitType; + // TODO: add job resource required public IntegrationTaskRunner(FlinkService flinkService, FlinkInfo flinkInfo, Integer commitType) { this.flinkService = flinkService; this.flinkInfo = flinkInfo; diff --git a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/load/HudiLoadNode.java b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/load/HudiLoadNode.java index b1448a82bc3..6bfe78b2e03 100644 --- a/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/load/HudiLoadNode.java +++ b/inlong-sort/sort-common/src/main/java/org/apache/inlong/sort/protocol/node/load/HudiLoadNode.java @@ -165,6 +165,7 @@ public Map tableOptions() { options.put(HUDI_OPTION_RECORD_KEY_FIELD_NAME, primaryKey); options.put("connector", "hudi-inlong"); + // TODO adoptive of hudi operator partitions return options; }