apache · corgy-w · Aug 26, 2025 · Aug 14, 2025 · Aug 16, 2025 · Aug 17, 2025
diff --git a/docs/en/connector-v2/source/Clickhouse.md b/docs/en/connector-v2/source/Clickhouse.md
@@ -18,6 +18,7 @@ import ChangeLog from '../changelog/connector-clickhouse.md';
 - [x] [column projection](../../concept/connector-v2-features.md)
 - [x] [parallelism](../../concept/connector-v2-features.md)
 - [x] [support user-defined split](../../concept/connector-v2-features.md)
+- [x] [support multiple table read](../../concept/connector-v2-features.md)
 
 > supports query SQL and can achieve projection effect.
 
@@ -56,14 +57,22 @@ They can be downloaded via install-plugin.sh or from the Maven central repositor
 | host              | String | Yes      | -                      | `ClickHouse` cluster address, the format is `host:port` , allowing multiple `hosts` to be specified. Such as `"host1:8123,host2:8123"` .                                                                                                                                                                    |
 | username          | String | Yes      | -                      | `ClickHouse` user username.                                                                                                                                                                                                                                                                                 |
 | password          | String | Yes      | -                      | `ClickHouse` user password.                                                                                                                                                                                                                                                                                 |
+| table_list        | Array  | NO       | -                      | The list of tables to be read.                                                                                                                                                                                                                                                                              |
+| clickhouse.config | Map    | No       | -                      | In addition to the above mandatory parameters that must be specified by `clickhouse-jdbc` , users can also specify multiple optional parameters, which cover all the [parameters](https://github.com/ClickHouse/clickhouse-jdbc/tree/master/clickhouse-client#configuration) provided by `clickhouse-jdbc`. |
+| server_time_zone  | String | No       | ZoneId.systemDefault() | The session time zone in database server. If not set, then ZoneId.systemDefault() is used to determine the server time zone.                                                                                                                                                                                |
+| common-options    |        | No       | -                      | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details.                                                                                                                                                                                          |
+
+Table list configuration:
+
+|       Name        |  Type  | Required |        Default         |                                                                                                                                                 Description                                                                                                                                                 |
+|-------------------|--------|----------|------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | table_path        | String | NO       | -                      | The path to the full path of table, example: `default.table`                                                                                                                                                                                                                                                |
 | sql               | String | NO       | -                      | The query sql used to search data though Clickhouse server.                                                                                                                                                                                                                                                 |
 | filter_query      | String | NO       | -                      | Data filtering in Clickhouse. the format is "field = value", example : filter_query = "id > 2 and type = 1"                                                                                                                                                                                                 |
-| partition_list    | Array  | NO       | -                      | Table partition list to filter the specified partition. If it is a partitioned table, this field can be configured to filter the data of the specified partition. example: partition_list = ["20250615", "20250616"]                                                                                        || batch_size        | int    | NO       | 1024                   | The maximum rows of data that can be obtained by reading from Clickhouse once.                                                                                                                                                                                                                             |
+| partition_list    | Array  | NO       | -                      | Table partition list to filter the specified partition. If it is a partitioned table, this field can be configured to filter the data of the specified partition. example: partition_list = ["20250615", "20250616"]                                                                                        |
 | batch_size        | int    | NO       | 1024                   | The maximum rows of data that can be obtained by reading from Clickhouse once.                                                                                                                                                                                                                              |
-| clickhouse.config | Map    | No       | -                      | In addition to the above mandatory parameters that must be specified by `clickhouse-jdbc` , users can also specify multiple optional parameters, which cover all the [parameters](https://github.com/ClickHouse/clickhouse-jdbc/tree/master/clickhouse-client#configuration) provided by `clickhouse-jdbc`. |
-| server_time_zone  | String | No       | ZoneId.systemDefault() | The session time zone in database server. If not set, then ZoneId.systemDefault() is used to determine the server time zone.                                                                                                                                                                                |
-| common-options    |        | No       | -                      | Source plugin common parameters, please refer to [Source Common Options](../source-common-options.md) for details.                                                                                                                                                                                          |
+
+Note: When this configuration corresponds to a single table, you can flatten the configuration items in table_list to the outer layer.
 
 ## Parallel Reader
 The Clickhouse source connector supports parallel reading of data.
@@ -86,6 +95,7 @@ Use `table_path` to replace `sql` for single table reading.
 
 ## How to Create a Clickhouse Data Synchronization Jobs
 
+### Single Table
 The following example demonstrates how to create a data synchronization job that reads data from Clickhouse and prints it on the local client:
 
 **Case 1: Parallel reading based on the part read strategy**
@@ -183,6 +193,43 @@ sink {
 }
 ```
 
+### Multiple table
+```hocon
+env {
+  job.mode = "BATCH"
+  parallelism = 5
+}
+
+source {
+  Clickhouse {
+    host = "localhost:8123"
+    username = "xxx"
+    password = "xxx"
+    table_list = [
+      {
+        table_path = "default.table1"
+        sql = "select * from default.table1 where id > 2 and type = 1"
+      },
+      {
+        table_path = "default.table2"
+        sql = "select * from default.table2 where age > 18"
+      }
+    ]
+    server_time_zone = "UTC"
+    clickhouse.config = {
+      "socket_timeout": "300000"
+    }
+  }
+}
+
+# Console printing of the read Clickhouse data
+sink {
+  Console {
+    parallelism = 1
+  }
+}
+```
+
 ## Changelog
 
 <ChangeLog />
diff --git a/docs/zh/connector-v2/source/Clickhouse.md b/docs/zh/connector-v2/source/Clickhouse.md
@@ -18,6 +18,7 @@ import ChangeLog from '../changelog/connector-clickhouse.md';
 - [x] [列映射](../../concept/connector-v2-features.md)
 - [ ] [并行度](../../concept/connector-v2-features.md)
 - [ ] [支持用户自定义拆分](../../concept/connector-v2-features.md)
+- [x] [支持多表读](../../concept/connector-v2-features.md)
 
 > 支持查询SQL，可以实现投影效果。
 
@@ -51,54 +52,181 @@ import ChangeLog from '../changelog/connector-clickhouse.md';
 ## Source 选项
 
 |       名称                   |   类型    | 是否必须 |  默认值         |                                                                                                                                                 描述                                                                                                                                                 |
-|-------------------|--------|----------|------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| host              | String | 是      | -                      | `ClickHouse` 集群地址, 格式是`host:port` , 允许多个`hosts`配置. 例如 `"host1:8123,host2:8123"` .                                                                                                                                                                    |
-| database          | String | 是      | -                      | The `ClickHouse` 数据库名称.                                                                                                                                                                                                                                                                                  |
-| sql               | String | 是      | -                      | 用于通过Clickhouse服务搜索数据的查询sql.                                                                                                                                                                                                                                                 |
-| username          | String | 是      | -                      | `ClickHouse` user 用户账号.                                                                                                                                                                                                                                                                                 |
-| password          | String | 是      | -                      | `ClickHouse` user 用户密码.                                                                                                                                                                                                                                                                                 |
+|-------------------|--------|----------|------------------------|-----------------------------------------------------------------------------------|
+| host              | String | 是      | -                      | `ClickHouse` 集群地址, 格式是`host:port` , 允许多个`hosts`配置. 例如 `"host1:8123,host2:8123"` . |
+| username          | String | 是      | -                      | `ClickHouse` user 用户账号.                                                           |
+| password          | String | 是      | -                      | `ClickHouse` user 用户密码.                                                           |
+| table_list        | Array  | NO       | -                      | 要读取的数据表列表，支持配置多表.                                                                 |
 | clickhouse.config | Map    | 否       | -                      | 除了上述必须由 `clickhouse-jdbc` 指定的必填参数外，用户还可以指定多个可选参数，这些参数涵盖了 `clickhouse-jdbc` 提供的所有[参数](https://github.com/ClickHouse/clickhouse-jdbc/tree/master/clickhouse-client#configuration). |
 | server_time_zone  | String | 否       | ZoneId.systemDefault() | 数据库服务中的会话时区。如果未设置，则使用ZoneId.systemDefault（）设置服务时区.                                                                                                                                                                                |
 | common-options    |        | 否       | -                      | 源插件常用参数，详见 [源通用选项](../source-common-options.md).                                                                                                                                                                                          |
 
+多表配置：
+
+|       名称                   |   类型    | 是否必须 |  默认值         |                                                                                                                                                 描述                                                                                                                                                 |
+|----------------|--------|------|------|--------------------------------------------------------------------------------------|
+| table_path     | String | 否    | -    | 数据表的完整路径, 例如: `default.table`.                                                       |
+| sql            | String | 否    | -    | 用于通过Clickhouse服务搜索数据的查询sql.                                                          |
+| filter_query   | String | 否    | -    | 数据过滤条件. 格式为: "field = value", 例如 : filter_query = "id > 2 and type = 1"              |
+| partition_list | Array  | 否    | -    | 指定分区列表过滤数据. 如果是分区表，该字段可以配置为过滤指定分区的数据。. 例如: partition_list = ["20250615", "20250616"] |
+| batch_size     | int    | 否    | 1024 | 从Clickhouse读取一次可以获得的最大数据行数。                                                          |
+
+注意: 当此配置对应于单个表时，您可以将table_list中的配置项展平到外层。
+
+## 并行读取
+
+Clickhouse源连接器支持并行读取数据。
+
+当仅指定`table_path`参数时，连接器根据从`system.parts`系统表中获取的数据表的part文件实现并行读取。
+
+当仅指定`sql`参数时，连接器在集群的每个分片上基于本地表执行查询来实现并发读取。如果`sql`参数指定了一个分布式表，则会根据分布式表引擎的集群名获取分片列表执行并发读取。如果`sql`指定了一个本地表，那么`host`参数配置的节点列表将被视作集群分片列表执行并发读取。
+
+如果同时设置了`table_path`和`sql`参数，则将在sql模式下执行。推荐在指定`sql`参数时同时配置`table_path`参数以更好地识别表的元数据。
+
+## Tips
+当指定`table_path`参数时，如果不想读取整个表，可以指定`partition_list`或`filter_query`参数过滤指定条件或分区的数据。
+* `partition_list`: 过滤指定分区的数据
+* `filter_query`: 根据指定条件对数据进行过滤
+
+`batch_size`参数可用于控制每次查询读取的数据量，以避免在读取大量数据时出现OOM异常。适当增加这个值将有助于提高读取过程的性能。
+
+当读取单个表的数据时，建议使用`table_path`参数替代`sql`参数。
+
 ## 如何创建Clickhouse数据同步作业
 
-以下示例演示了如何创建数据同步作业，该做作业从Clickhouse读取数据并在本地客户端上打印:
+### 单表配置
+下面的示例演示了如何创建一个数据同步作业，该作业从Clickhouse读取数据并在本地客户端上打印数据
+
+**案例1：基于part文件读取策略的并行读取**
+```hocon
+env {
+  job.mode = "BATCH"
+  parallelism = 5
+}
+
+source {
+  Clickhouse {
+    host = "localhost:8123"
+    username = "xxx"
+    password = "xxx"
+    table_path = "default.table"
+    server_time_zone = "UTC"
+    partition_list = ["20250615", "20250616"]
+    filter_query = "id > 2 and type = 1"
+    batch_size = 1024
+    clickhouse.config = {
+      "socket_timeout": "300000"
+    }
+  }
+}
+
+# Console printing of the read Clickhouse data
+sink {
+  Console {
+    parallelism = 1
+  }
+}
+```
 
-```bash
-# 设置要执行的任务的基本配置
+**案例2：基于SQL读取策略的并行读取**
+> 注意：SQL模式下的并行读取方式目前仅支持单表和where条件查询
+```hocon
 env {
-  parallelism = 10
   job.mode = "BATCH"
+  parallelism = 5
 }
 
-# 创建连接到Clickhouse的源
 source {
   Clickhouse {
     host = "localhost:8123"
-    database = "default"
-    sql = "select * from test where age = 20 limit 100"
-    username = "xxxxx"
-    password = "xxxxx"
+    username = "xxx"
+    password = "xxx"
+    table_path = "default.table"
     server_time_zone = "UTC"
-    plugin_output = "test"
+    sql = "select * from default.table where id > 2 and type = 1"
+    batch_size = 1024
     clickhouse.config = {
       "socket_timeout": "300000"
     }
   }
 }
 
-# 控制台打印读取的Clickhouse数据
+# Console printing of the read Clickhouse data
 sink {
   Console {
     parallelism = 1
   }
 }
 ```
 
-### 小提示
+**案例3：针对复杂SQL场景的单并发读取**
+
+当执行复杂SQL查询场景（例如带有join、group by、子查询等的查询）时，连接器将自动切换到单并发执行方式，即使配置了更高的并行度值。
+
+```hocon
+env {
+  job.mode = "BATCH"
+  parallelism = 1
+}
 
-> 1.[SeaTunnel 部署文档](../../start-v2/locally/deployment.md).
+source {
+  Clickhouse {
+    host = "localhost:8123"
+    username = "xxx"
+    password = "xxx"
+    server_time_zone = "UTC"
+    sql = "select t1.id, t2.category from default.table1 t1 global join default.table2 t2 on t1.id = t2.id where t1.age > 18"
+    batch_size = 1024
+    clickhouse.config = {
+      "socket_timeout": "300000"
+    }
+  }
+}
+
+# Console printing of the read Clickhouse data
+sink {
+  Console {
+    parallelism = 1
+  }
+}
+```
+
+### 多表配置
+```hocon
+env {
+  job.mode = "BATCH"
+  parallelism = 5
+}
+
+source {
+  Clickhouse {
+    host = "localhost:8123"
+    username = "xxx"
+    password = "xxx"
+    table_list = [
+      {
+        table_path = "default.table1"
+        sql = "select * from default.table1 where id > 2 and type = 1"
+      },
+      {
+        table_path = "default.table2"
+        sql = "select * from default.table2 where age > 18"
+      }
+    ]
+    server_time_zone = "UTC"
+    clickhouse.config = {
+      "socket_timeout": "300000"
+    }
+  }
+}
+
+# Console printing of the read Clickhouse data
+sink {
+  Console {
+    parallelism = 1
+  }
+}
+```
 
 ## 变更日志