You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@seatunnel.apache.org by ri...@apache.org on 2022/01/16 11:47:23 UTC

[incubator-seatunnel] branch dev updated: [Feature][seatunnel-connector] Add tidb spark source (#927)

This is an automated email from the ASF dual-hosted git repository.

rickyhuo pushed a commit to branch dev
in repository https://gitbox.apache.org/repos/asf/incubator-seatunnel.git


The following commit(s) were added to refs/heads/dev by this push:
     new f984558  [Feature][seatunnel-connector] Add tidb spark source (#927)
f984558 is described below

commit f9845588ab37bb1b2530177f6bdd933e937be770
Author: zixi0825 <su...@gmail.com>
AuthorDate: Sun Jan 16 19:47:16 2022 +0800

    [Feature][seatunnel-connector] Add tidb spark source (#927)
    
    * tidb-connector
    
    * Delete application.conf
    
    * tidb-source
    
    * add tidb-spark-source
    
    * fix
    
    * Update Tidb.md
    
    * fix
    
    * fix
    
    Co-authored-by: zixi0825 <su...@linklogis.com>
---
 docs/en/spark/configuration/source-plugins/Tidb.md | 57 ++++++++++++++++++++++
 pom.xml                                            |  1 -
 .../org.apache.seatunnel.spark.BaseSparkSource     |  1 +
 .../org/apache/seatunnel/spark/source/Tidb.scala   | 39 +++++++++++++++
 4 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/docs/en/spark/configuration/source-plugins/Tidb.md b/docs/en/spark/configuration/source-plugins/Tidb.md
new file mode 100644
index 0000000..12fb601
--- /dev/null
+++ b/docs/en/spark/configuration/source-plugins/Tidb.md
@@ -0,0 +1,57 @@
+# Source plugin: Tidb
+
+### Description
+
+Read data from Tidb.
+
+### Env Options
+
+| name           | type   | required | default value |
+| -------------- | ------ | -------- | ------------- |
+| [spark.tispark.pd.addresses](#spark.tispark.pd.addresses-string)       | string | yes      | -             |
+| [spark.sql.extensions](#spark.sql.extensions-string)        | string | yes      | org.apache.spark.sql.TiExtensions         |
+
+##### spark.tispark.pd.addresses [string]
+
+your pd servers
+
+##### spark.sql.extensions [string]
+
+default value : org.apache.spark.sql.TiExtensions
+
+### Options
+
+| name           | type   | required | default value |
+| -------------- | ------ | -------- | ------------- |
+| [database](#database-string)       | string | yes      | -             |
+| [pre_sql](#pre_sql-string)        | string | yes      | -         |
+
+##### database [string]
+
+Tidb database
+
+##### pre_sql [string]
+
+sql script
+
+##### common options [string]
+
+Source Plugin common parameters, refer to [Source Plugin](./source-plugin.md) for details
+
+### Example
+
+```bash
+env {
+    spark.tispark.pd.addresses = "192.168.0.1:2379"
+    spark.sql.extensions = "org.apache.spark.sql.TiExtensions"
+}
+
+source {
+    tidb {
+        database = "test"
+        pre_sql = "select * from table1"
+    }
+}
+
+```
+
diff --git a/pom.xml b/pom.xml
index fd74733..c9dc72c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -419,7 +419,6 @@
                 <artifactId>tispark-assembly</artifactId>
                 <version>${tispark.version}</version>
             </dependency>
-
             <dependency>
                 <groupId>org.apache.druid</groupId>
                 <artifactId>druid-indexing-service</artifactId>
diff --git a/seatunnel-connectors/seatunnel-connector-spark-tidb/src/main/resources/META-INF/services/org.apache.seatunnel.spark.BaseSparkSource b/seatunnel-connectors/seatunnel-connector-spark-tidb/src/main/resources/META-INF/services/org.apache.seatunnel.spark.BaseSparkSource
new file mode 100644
index 0000000..484f872
--- /dev/null
+++ b/seatunnel-connectors/seatunnel-connector-spark-tidb/src/main/resources/META-INF/services/org.apache.seatunnel.spark.BaseSparkSource
@@ -0,0 +1 @@
+org.apache.seatunnel.spark.source.Tidb
\ No newline at end of file
diff --git a/seatunnel-connectors/seatunnel-connector-spark-tidb/src/main/scala/org/apache/seatunnel/spark/source/Tidb.scala b/seatunnel-connectors/seatunnel-connector-spark-tidb/src/main/scala/org/apache/seatunnel/spark/source/Tidb.scala
new file mode 100644
index 0000000..5d067c7
--- /dev/null
+++ b/seatunnel-connectors/seatunnel-connector-spark-tidb/src/main/scala/org/apache/seatunnel/spark/source/Tidb.scala
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.seatunnel.spark.source
+
+import org.apache.seatunnel.common.config.CheckConfigUtil.check
+import org.apache.seatunnel.common.config.CheckResult
+import org.apache.seatunnel.spark.SparkEnvironment
+import org.apache.seatunnel.spark.batch.SparkBatchSource
+import org.apache.spark.sql.{Dataset, Row}
+
+class Tidb extends SparkBatchSource {
+
+  override def prepare(env: SparkEnvironment): Unit = {}
+
+  override def checkConfig(): CheckResult = {
+    check(config, "pre_sql", "database")
+  }
+
+  override def getData(env: SparkEnvironment): Dataset[Row] = {
+    val spark = env.getSparkSession
+    spark.sql("use " + config.getString("database"))
+    spark.sql(config.getString("pre_sql"))
+  }
+}