ruanwenjun
diff --git a/‎.asf.yaml‎
Lines changed: 7 additions & 5 deletions b/‎.asf.yaml‎
Lines changed: 7 additions & 5 deletions
diff --git a/‎.gitattributes‎
Lines changed: 1 addition & 0 deletions b/‎.gitattributes‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/ISSUE_TEMPLATE/bug-report.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/backend.yml‎
Lines changed: 136 additions & 4 deletions b/‎.github/workflows/backend.yml‎
Lines changed: 136 additions & 4 deletions
diff --git a/‎DISCLAIMER‎
Lines changed: 1 addition & 1 deletion b/‎DISCLAIMER‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 16 additions & 42 deletions b/‎README.md‎
Lines changed: 16 additions & 42 deletions
diff --git a/‎config/hazelcast.yaml‎
Lines changed: 1 addition & 0 deletions b/‎config/hazelcast.yaml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎config/seatunnel.yaml‎
Lines changed: 1 addition & 2 deletions b/‎config/seatunnel.yaml‎
Lines changed: 1 addition & 2 deletions
@@ -15,18 +15,20 @@
 #
 
 github:
-  description: SeaTunnel is a distributed, high-performance data integration platform for the synchronization and transformation of massive data (offline & real-time).
+  description: SeaTunnel is a next-generation super high-performance, distributed, massive data integration tool.
   homepage: https://seatunnel.apache.org/
   labels:
     - data-integration
+    - change-data-capture
+    - cdc
     - high-performance
     - offline
     - real-time
-    - data-pipeline
-    - sql-engine
+    - batch
+    - streaming
+    - data-ingestion
     - apache
-    - seatunnel
-    - etl-framework
+    - elt
   enabled_merge_buttons:
     squash: true
     merge: false
 
@@ -0,0 +1 @@
+*.sh text eol=lf
@@ -90,10 +90,10 @@ body:
 
   - type: textarea
     attributes:
-      label: Flink or Spark Version
-      description: Provide Flink or Spark Version.
+      label: Zeta or Flink or Spark Version
+      description: Provide Zeta or Flink or Spark Version.
       placeholder: >
-        Please provide the version of Flink or Spark.
+        Please provide the version of Zeta or Flink or Spark.
     validations:
       required: false
 
 
@@ -18,6 +18,7 @@
 name: Backend
 on:
   push:
+  pull_request:
     branches:
       - business-dev
       - "v[0-9]+.[0-9]+.[0-9]+-release"
@@ -26,8 +27,6 @@ on:
       - business-dev
       - "v[0-9]+.[0-9]+.[0-9]+-release"
     paths-ignore:
-      - 'docs/**'
-      - '**/*.md'
       - 'seatunnel-ui/**'
 
 concurrency:
@@ -270,7 +269,7 @@ jobs:
       - name: run updated modules integration test (part-1)
         if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
         run: |
-          sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 2 0`
+          sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 5 0`
           ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
         env:
           MAVEN_OPTS: -Xmx2048m
@@ -295,7 +294,7 @@ jobs:
       - name: run updated modules integration test (part-2)
         if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
         run: |
-          sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 2 1`
+          sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 5 1`
           if [ ! -z $sub_modules ]; then
             ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
           else
@@ -304,6 +303,91 @@ jobs:
         env:
           MAVEN_OPTS: -Xmx2048m
 
+  updated-modules-integration-test-part-3:
+    needs: [ changes, sanity-check ]
+    if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        java: [ '8' ]
+        os: [ 'self-hosted' ]
+    timeout-minutes: 90
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK ${{ matrix.java }}
+        uses: actions/setup-java@v3
+        with:
+          java-version: ${{ matrix.java }}
+          distribution: 'temurin'
+          cache: 'maven'
+      - name: run updated modules integration test (part-3)
+        if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
+        run: |
+          sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 5 2`
+          if [ ! -z $sub_modules ]; then
+            ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
+          else
+            echo "sub modules is empty, skipping"
+          fi
+        env:
+          MAVEN_OPTS: -Xmx2048m
+
+  updated-modules-integration-test-part-4:
+    needs: [ changes, sanity-check ]
+    if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        java: [ '8' ]
+        os: [ 'self-hosted' ]
+    timeout-minutes: 90
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK ${{ matrix.java }}
+        uses: actions/setup-java@v3
+        with:
+          java-version: ${{ matrix.java }}
+          distribution: 'temurin'
+          cache: 'maven'
+      - name: run updated modules integration test (part-4)
+        if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
+        run: |
+          sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 5 3`
+          if [ ! -z $sub_modules ]; then
+            ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
+          else
+            echo "sub modules is empty, skipping"
+          fi
+        env:
+          MAVEN_OPTS: -Xmx2048m
+  updated-modules-integration-test-part-5:
+      needs: [ changes, sanity-check ]
+      if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
+      runs-on: ${{ matrix.os }}
+      strategy:
+        matrix:
+          java: [ '8' ]
+          os: [ 'self-hosted' ]
+      timeout-minutes: 90
+      steps:
+        - uses: actions/checkout@v2
+        - name: Set up JDK ${{ matrix.java }}
+          uses: actions/setup-java@v3
+          with:
+            java-version: ${{ matrix.java }}
+            distribution: 'temurin'
+            cache: 'maven'
+        - name: run updated modules integration test (part-5)
+          if: needs.changes.outputs.api == 'false' && needs.changes.outputs.it-modules != ''
+          run: |
+            sub_modules=`python tools/update_modules_check/update_modules_check.py sub_update_it_module ${{needs.changes.outputs.it-modules}} 5 4`
+            if [ ! -z $sub_modules ]; then
+              ./mvnw -T 1C -B verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl $sub_modules -am -Pci
+            else
+              echo "sub modules is empty, skipping"
+            fi
+          env:
+            MAVEN_OPTS: -Xmx2048m
   engine-v2-it:
     needs: [ changes, sanity-check ]
     if: needs.changes.outputs.api == 'true'
@@ -637,6 +721,54 @@ jobs:
         env:
           MAVEN_OPTS: -Xmx4096m
 
+  jdbc-connectors-it-part-4:
+    needs: [ changes, sanity-check ]
+    if: needs.changes.outputs.api == 'true'
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        java: [ '8', '11' ]
+        os: [ 'ubuntu-latest' ]
+    timeout-minutes: 90
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK ${{ matrix.java }}
+        uses: actions/setup-java@v3
+        with:
+          java-version: ${{ matrix.java }}
+          distribution: 'temurin'
+          cache: 'maven'
+      - name: run jdbc connectors integration test (part-4)
+        if: needs.changes.outputs.api == 'true'
+        run: |
+          ./mvnw -B -T 1C verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-jdbc-e2e-part-4 -am -Pci
+        env:
+          MAVEN_OPTS: -Xmx4096m
+
+  jdbc-connectors-it-part-5:
+    needs: [ changes, sanity-check ]
+    if: needs.changes.outputs.api == 'true'
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        java: [ '8', '11' ]
+        os: [ 'ubuntu-latest' ]
+    timeout-minutes: 90
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up JDK ${{ matrix.java }}
+        uses: actions/setup-java@v3
+        with:
+          java-version: ${{ matrix.java }}
+          distribution: 'temurin'
+          cache: 'maven'
+      - name: run jdbc connectors integration test (part-5)
+        if: needs.changes.outputs.api == 'true'
+        run: |
+          ./mvnw -B -T 1C verify -DskipUT=true -DskipIT=false -D"license.skipAddThirdParty"=true --no-snapshot-updates -pl :connector-jdbc-e2e-part-5 -am -Pci
+        env:
+          MAVEN_OPTS: -Xmx4096m
+
   kafka-connector-it:
     needs: [ changes, sanity-check ]
     if: needs.changes.outputs.api == 'true'
 
@@ -1,4 +1,4 @@
-Apache SeaTunnel (incubating) is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC.
+Apache SeaTunnel is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC.
 Incubation is required of all newly accepted projects until a further review indicates that the infrastructure,
 communications, and decision making process have stabilized in a manner consistent with other successful ASF projects.
 While incubation status is not necessarily a reflection of the completeness or stability of the code,
 
@@ -3,7 +3,7 @@
 <img src="https://seatunnel.apache.org/image/logo.png" alt="seatunnel logo" height="200px" align="right" />
 
 [![Backend Workflow](https://github.com/apache/seatunnel/actions/workflows/backend.yml/badge.svg?branch=dev)](https://github.com/apache/seatunnel/actions/workflows/backend.yml)
-[![Slack](https://img.shields.io/badge/slack-%23seatunnel-4f8eba?logo=slack)](https://the-asf.slack.com/archives/C053HND1D6X)
+[![Slack](https://img.shields.io/badge/slack-%23seatunnel-4f8eba?logo=slack)](https://s.apache.org/seatunnel-slack)
 [![Twitter Follow](https://img.shields.io/twitter/follow/ASFSeaTunnel.svg?label=Follow&logo=twitter)](https://twitter.com/ASFSeaTunnel)
 
 ---
@@ -13,9 +13,7 @@ SeaTunnel was formerly named Waterdrop , and renamed SeaTunnel since October 12,
 
 ---
 
-SeaTunnel is a very easy-to-use ultra-high-performance distributed data integration platform that supports real-time
-synchronization of massive data. It can synchronize tens of billions of data stably and efficiently every day, and has
-been used in the production of nearly 100 companies.
+SeaTunnel is a next-generation super high-performance, distributed, massive data integration tool. It can synchronize tens of billions of data stably and efficiently every day, and has been used in the production of many companies.
 
 ## Why do we need SeaTunnel
 
@@ -25,21 +23,20 @@ SeaTunnel focuses on data integration and data synchronization, and is mainly de
 - Complex synchronization scenarios: Data synchronization needs to support various synchronization scenarios such as offline-full synchronization, offline-incremental synchronization, CDC, real-time synchronization, and full database synchronization.
 - High demand in resource: Existing data integration and data synchronization tools often require vast computing resources or JDBC connection resources to complete real-time synchronization of massive small tables. This has increased the burden on enterprises to a certain extent.
 - Lack of quality and monitoring: Data integration and synchronization processes often experience loss or duplication of data. The synchronization process lacks monitoring, and it is impossible to intuitively understand the real-situation of the data during the task process.
-- Complex technology stack: The technology components used by enterprises are different, and users need to develop corresponding synchronization programs for different components to complete data integration.
-- Difficulty in management and maintenance: Limited to different underlying technology components (Flink/Spark) , offline synchronization and real-time synchronization often have be developed and managed separately, which increases the difficulty of the management and maintainance.
 
 ## Features of SeaTunnel
 
-- Rich and extensible Connector: SeaTunnel provides a Connector API that does not depend on a specific execution engine. Connectors (Source, Transform, Sink) developed based on this API can run on many different engines, such as SeaTunnel Engine, Flink, Spark that are currently supported.
-- Connector plugin: The plugin design allows users to easily develop their own Connector and integrate it into the SeaTunnel project. Currently, SeaTunnel has supported more than 70 Connectors, and the number is surging. There is the list of connectors we [supported and plan to support](https://github.com/apache/seatunnel/issues/3018).
+- Diverse Connectors: SeaTunnel has supported more than 100 Connectors, and the number is surging. Here is the list of connectors we [supported and plan to support](https://github.com/apache/seatunnel/issues/3018).
 - Batch-stream integration: Connectors developed based on SeaTunnel Connector API are perfectly compatible with offline synchronization, real-time synchronization, full- synchronization, incremental synchronization and other scenarios. It greatly reduces the difficulty of managing data integration tasks.
 - Support distributed snapshot algorithm to ensure data consistency.
-- Multi-engine support: SeaTunnel uses SeaTunnel Engine for data synchronization by default. At the same time, SeaTunnel also supports the use of Flink or Spark as the execution engine of the Connector to adapt to the existing technical components of the enterprise. In addition, SeaTunnel supports multiple versions of Spark and Flink.
+- Multi-engine support: SeaTunnel uses SeaTunnel Zeta Engine for data synchronization by default. At the same time, SeaTunnel also supports the use of Flink or Spark as the execution engine of the Connector to adapt to the existing technical components of the enterprise. In addition, SeaTunnel supports multiple versions of Spark and Flink.
 - JDBC multiplexing, database log multi-table parsing: SeaTunnel supports multi-table or whole database synchronization, which solves the problem of over-JDBC connections; supports multi-table or whole database log reading and parsing, which solves the need for CDC multi-table synchronization scenarios problems with repeated reading and parsing of logs.
 - High throughput and low latency: SeaTunnel supports parallel reading and writing, providing stable and reliable data synchronization capabilities with high throughput and low latency.
 - Perfect real-time monitoring: SeaTunnel supports detailed monitoring information of each step in the data synchronization process, allowing users to easily understand the number of data, data size, QPS and other information read and written by the synchronization task.
 - Two job development methods are supported: coding and canvas design. The SeaTunnel web project https://github.com/apache/seatunnel-web provides visual management of jobs, scheduling, running and monitoring capabilities.
 
+Besides, SeaTunnel provides a Connector API that does not depend on a specific execution engine. Connectors (Source, Transform, Sink) developed based on this API can run on many different engines, such as SeaTunnel Zeta Engine, Flink, Spark that are currently supported.
+
 ## SeaTunnel work flowchart
 
 ![SeaTunnel work flowchart](docs/en/images/architecture_diagram.png)
@@ -63,29 +60,15 @@ The default engine use by SeaTunnel is [SeaTunnel Engine](seatunnel-engine/READM
 
 ### Here's a list of our connectors with their health status.[connector status](docs/en/Connector-v2-release-state.md)
 
-## Environmental dependency
-
-1. java runtime environment, java >= 8
-
-2. If you want to run SeaTunnel in a cluster environment, any of the following Spark cluster environments is usable:
-
-- Spark on Yarn
-- Spark Standalone
-
-If the data volume is small, or the goal is merely for functional verification, you can also start in local mode without
-a cluster environment, because SeaTunnel supports standalone operation. Note: SeaTunnel 2.0 supports running on Spark
-and Flink.
-
-## Compiling project
-Follow this [document](docs/en/contribution/setup.md).
 
 ## Downloads
 
 Download address for run-directly software package : https://seatunnel.apache.org/download
 
 ## Quick start
+SeaTunnel uses SeaTunnel Zeta Engine as the runtime execution engine for data synchronization by default. We highly recommend utilizing Zeta engine as the runtime engine, as it offers superior functionality and performance. By the way, SeaTunnel also supports the use of Flink or Spark as the execution engine. 
 
-**SeaTunnel Engine**
+**SeaTunnel Zeta Engine**
 https://seatunnel.apache.org/docs/start-v2/locally/quick-start-seatunnel-engine/
 
 **Spark**
@@ -101,6 +84,10 @@ https://seatunnel.apache.org/docs/start-v2/locally/quick-start-flink
 Weibo business uses an internal customized version of SeaTunnel and its sub-project Guardian for SeaTunnel On Yarn task
 monitoring for hundreds of real-time streaming computing tasks.
 
+- Tencent Cloud
+
+Collecting various logs from business services into Apache Kafka, some of the data in Apache Kafka is consumed and extracted through SeaTunnel, and then store into Clickhouse. 
+
 - Sina, Big Data Operation Analysis Platform
 
 Sina Data Operation Analysis Platform uses SeaTunnel to perform real-time and offline analysis of data operation and
@@ -110,27 +97,11 @@ maintenance for Sina News, CDN and other services, and write it into Clickhouse.
 
 Sogou Qiqian System takes SeaTunnel as an ETL tool to help establish a real-time data warehouse system.
 
-- Qutoutiao, Qutoutiao Data Center
-
-Qutoutiao Data Center uses SeaTunnel to support mysql to hive offline ETL tasks, real-time hive to clickhouse backfill
-technical support, and well covers most offline and real-time tasks needs.
-
-- Yixia Technology, Yizhibo Data Platform
-
 - Yonghui Superstores Founders' Alliance-Yonghui Yunchuang Technology, Member E-commerce Data Analysis Platform
 
 SeaTunnel provides real-time streaming and offline SQL computing of e-commerce user behavior data for Yonghui Life, a
 new retail brand of Yonghui Yunchuang Technology.
 
-- Shuidichou, Data Platform
-
-Shuidichou adopts SeaTunnel to do real-time streaming and regular offline batch processing on Yarn, processing 3~4T data
-volume average daily, and later writing the data to Clickhouse.
-
-- Tencent Cloud
-
-Collecting various logs from business services into Apache Kafka, some of the data in Apache Kafka is consumed and extracted through SeaTunnel, and then store into Clickhouse. 
-
 For more use cases, please refer to: https://seatunnel.apache.org/blog
 
 ## Code of conduct
@@ -140,14 +111,17 @@ By participating, you are expected to uphold this code. Please follow
 the [REPORTING GUIDELINES](https://www.apache.org/foundation/policies/conduct#reporting-guidelines) to report
 unacceptable behavior.
 
-## Developer
+## Contributors
 
 Thanks to [all developers](https://github.com/apache/seatunnel/graphs/contributors)!
 
 <a href="https://github.com/apache/seatunnel/graphs/contributors">
   <img src="https://contrib.rocks/image?repo=apache/seatunnel" />
 </a>
 
+## How to compile
+Please follow this [document](docs/en/contribution/setup.md).
+
 ## Contact Us
 
 * Mail list: **[email protected]**. Mail to `[email protected]`, follow the reply to subscribe
 
@@ -38,3 +38,4 @@ hazelcast:
     hazelcast.tcp.join.port.try.count: 30
     hazelcast.logging.type: log4j2
     hazelcast.operation.generic.thread.count: 50
+
@@ -17,6 +17,7 @@
 
 seatunnel:
   engine:
+    history-job-expire-minutes: 1440
     backup-count: 1
     queue-type: blockingqueue
     print-execution-info-interval: 60
@@ -26,8 +27,6 @@ seatunnel:
     checkpoint:
       interval: 10000
       timeout: 60000
-      max-concurrent: 1
-      tolerable-failure: 2
       storage:
         type: hdfs
         max-retained: 3
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-Apache SeaTunnel (incubating) is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC.`
	`1`	`+Apache SeaTunnel is an effort undergoing incubation at The Apache Software Foundation (ASF), sponsored by the Apache Incubator PMC.`
`2`	`2`	`Incubation is required of all newly accepted projects until a further review indicates that the infrastructure,`
`3`	`3`	`communications, and decision making process have stabilized in a manner consistent with other successful ASF projects.`
`4`	`4`	`While incubation status is not necessarily a reflection of the completeness or stability of the code,`