You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by fo...@apache.org on 2023/01/04 03:32:11 UTC
[hudi] 01/45: [MINOR] Adapt to tianqiong spark
This is an automated email from the ASF dual-hosted git repository.
forwardxu pushed a commit to branch release-0.12.1
in repository https://gitbox.apache.org/repos/asf/hudi.git
commit ac0d1d81a48d1ce558294757a5812487cd9b2cf0
Author: XuQianJin-Stars <fo...@apache.com>
AuthorDate: Tue Aug 23 11:47:37 2022 +0800
[MINOR] Adapt to tianqiong spark
---
dev/settings.xml | 266 +++++++++++++++++++++
dev/tencent-install.sh | 157 ++++++++++++
dev/tencent-release.sh | 154 ++++++++++++
hudi-cli/pom.xml | 4 +-
hudi-client/hudi-spark-client/pom.xml | 4 +-
hudi-examples/hudi-examples-spark/pom.xml | 4 +-
hudi-integ-test/pom.xml | 4 +-
hudi-spark-datasource/hudi-spark-common/pom.xml | 12 +-
hudi-spark-datasource/hudi-spark/pom.xml | 12 +-
hudi-spark-datasource/hudi-spark2/pom.xml | 12 +-
hudi-spark-datasource/hudi-spark3-common/pom.xml | 2 +-
hudi-spark-datasource/hudi-spark3.1.x/pom.xml | 2 +-
hudi-spark-datasource/hudi-spark3.2.x/pom.xml | 6 +-
hudi-spark-datasource/hudi-spark3.3.x/pom.xml | 6 +-
hudi-sync/hudi-hive-sync/pom.xml | 4 +-
hudi-utilities/pom.xml | 10 +-
.../org/apache/hudi/utilities/UtilHelpers.java | 38 ++-
packaging/hudi-integ-test-bundle/pom.xml | 8 +-
pom.xml | 94 +++++---
19 files changed, 715 insertions(+), 84 deletions(-)
diff --git a/dev/settings.xml b/dev/settings.xml
new file mode 100644
index 0000000000..5f5dfd4fa6
--- /dev/null
+++ b/dev/settings.xml
@@ -0,0 +1,266 @@
+<settings>
+ <proxies>
+ <proxy>
+ <id>dev http</id>
+ <active>true</active>
+ <protocol>http</protocol>
+ <host>web-proxy.oa.com</host>
+ <port>8080</port>
+ <nonProxyHosts>mirrors.tencent.com|qq.com|localhost|127.0.0.1|*.oa.com|repo.maven.apache.org|packages.confluent.io</nonProxyHosts>
+ </proxy>
+ <proxy>
+ <id>dev https</id>
+ <active>true</active>
+ <protocol>https</protocol>
+ <host>web-proxy.oa.com</host>
+ <port>8080</port>
+ <nonProxyHosts>mirrors.tencent.com|qq.com|localhost|127.0.0.1|*.oa.com|repo.maven.apache.org|packages.confluent.io</nonProxyHosts>
+ </proxy>
+ </proxies>
+
+ <offline>false</offline>
+
+ <profiles>
+ <profile>
+ <id>nexus</id>
+ <repositories>
+ <repository>
+ <id>maven_public</id>
+ <url>https://mirrors.tencent.com/nexus/repository/maven-public/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>tencent_public</id>
+ <url>https://mirrors.tencent.com/repository/maven/tencent_public/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+
+ <repository>
+ <id>thirdparty</id>
+ <url>https://mirrors.tencent.com/repository/maven/thirdparty/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+
+ <repository>
+ <id>mqq</id>
+ <url>https://mirrors.tencent.com/repository/maven/mqq/</url>
+ <releases>
+ <enabled>false</enabled>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ </snapshots>
+ </repository>
+
+ <repository>
+ <id>thirdparty-snapshots</id>
+ <url>https://mirrors.tencent.com/repository/maven/thirdparty-snapshots/</url>
+ <releases>
+ <enabled>false</enabled>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ </snapshots>
+ </repository>
+ </repositories>
+
+ <pluginRepositories>
+ <pluginRepository>
+ <id>maven-public-plugin</id>
+ <url>https://mirrors.tencent.com/nexus/repository/maven-public/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </pluginRepository>
+ <pluginRepository>
+ <id>public-plugin</id>
+ <url>https://mirrors.tencent.com/repository/maven/tencent_public/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </pluginRepository>
+ <pluginRepository>
+ <id>thirdparty-plugin</id>
+ <url>https://mirrors.tencent.com/repository/maven/thirdparty/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </pluginRepository>
+ </pluginRepositories>
+ </profile>
+
+ <profile>
+ <id>tbds</id>
+ <repositories>
+ <repository>
+ <id>tbds-maven-public</id>
+ <url>http://tbdsrepo.oa.com/repository/maven-public/</url>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>never</updatePolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>never</updatePolicy>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>tbds</id>
+ <url>http://tbdsrepo.oa.com/repository/tbds/</url>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>never</updatePolicy>
+ <checksumPolicy>ignore</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>never</updatePolicy>
+ <checksumPolicy>ignore</checksumPolicy>
+ </snapshots>
+ </repository>
+ </repositories>
+ <pluginRepositories>
+ <pluginRepository>
+ <id>tbds</id>
+ <url>http://tbdsrepo.oa.com/repository/tbds/</url>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>never</updatePolicy>
+ <checksumPolicy>ignore</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>never</updatePolicy>
+ <checksumPolicy>ignore</checksumPolicy>
+ </snapshots>
+ </pluginRepository>
+ <pluginRepository>
+ <id>tbds-maven-public</id>
+ <url>http://tbdsrepo.oa.com/repository/maven-public/</url>
+ <releases>
+ <enabled>true</enabled>
+ <updatePolicy>never</updatePolicy>
+ <checksumPolicy>warn</checksumPolicy>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>never</updatePolicy>
+ <checksumPolicy>ignore</checksumPolicy>
+ </snapshots>
+ </pluginRepository>
+ </pluginRepositories>
+ </profile>
+
+ <profile>
+ <id>confluent_repo</id>
+ <repositories>
+ <repository>
+ <id>tencent-repo</id>
+ <url>https://mirrors.tencent.com/repository/maven/CSIG_TWINS</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>confluent</id>
+ <url>https://packages.confluent.io/maven/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ </repositories>
+ </profile>
+
+ <profile>
+ <id>tianqiong_releases</id>
+ <repositories>
+ <repository>
+ <id>tianqiong-releases</id>
+ <url>https://mirrors.tencent.com/repository/maven/tianqiong-releases</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ </repositories>
+ </profile>
+
+ <profile>
+ <id>tianqiong_snapshots</id>
+ <repositories>
+ <repository>
+ <id>tianqiong-snapshots</id>
+ <url>https://mirrors.tencent.com/repository/maven/tianqiong-snapshots</url>
+ <releases>
+ <enabled>false</enabled>
+ </releases>
+ <snapshots>
+ <enabled>true</enabled>
+ <updatePolicy>always</updatePolicy>
+ </snapshots>
+ </repository>
+ </repositories>
+ </profile>
+ </profiles>
+
+ <activeProfiles>
+ <activeProfile>confluent_repo</activeProfile>
+ <activeProfile>tianqiong_releases</activeProfile>
+ <activeProfile>tianqiong_snapshots</activeProfile>
+ <activeProfile>nexus</activeProfile>
+ </activeProfiles>
+ <servers>
+ <server>
+ <id>thirdparty-snapshots</id>
+ <username>ethansu</username>
+ <password>664a1eeceee211e9b3cf6c92bf47000d</password>
+ </server>
+ <server>
+ <id>tbds</id>
+ <username>tbds</username>
+ <password>tbds@Tbds.com</password>
+ </server>
+ <server>
+ <id>tianqiong-releases</id>
+ <username>g_datalake</username>
+ <password>be3c75f8fc9a11e9b2a36c92bf3acd2c</password>
+ </server>
+ <server>
+ <id>tianqiong-snapshots</id>
+ <username>g_datalake</username>
+ <password>be3c75f8fc9a11e9b2a36c92bf3acd2c</password>
+ </server>
+ </servers>
+</settings>
diff --git a/dev/tencent-install.sh b/dev/tencent-install.sh
new file mode 100644
index 0000000000..1e34f40440
--- /dev/null
+++ b/dev/tencent-install.sh
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e # Exit immediately if a command exits with a non-zero status
+
+if [ $# -ne 7 ]; then
+ echo "Usage: $0 <apache-version> <tencent-version> <rc-num> <release-repo-not-snapshot?> <scala_version> <spark_version> <flink_version>"
+ echo "example: $0 0.12.0 1 1 N 2.11 2 1.13"
+ exit
+fi
+
+version=$1-$2-tencent # <apache-version>-<tencent-version>-tencent, e.g. 0.10.0-1-tencent
+if [ $4 = "N" ]; then
+ version=$version-SNAPSHOT
+fi
+rc=$3
+release_repo=$4 # Y for release repo, others for snapshot repo
+
+tag=apache-hudi-$version
+tagrc=${tag}-rc${rc}
+
+echo "Preparing source for $tagrc"
+
+# change version
+echo "Change version for ${version}"
+mvn versions:set -DnewVersion=${version} -DgenerateBackupPom=false -s dev/settings.xml -U
+mvn versions:commit -s dev/settings.xml -U
+
+function git_push() {
+ # create version.txt for this release
+ if [ ${release_repo} = "Y" ]; then
+ git add .
+
+ if [ $# -eq 7 ]; then
+ git commit -m "Add version tag for release ${version} $5 $6"
+ else
+ git commit -m "Add version tag for release ${version}"
+ fi
+ else
+ git add .
+
+ if [ $# -eq 7 ]; then
+ git commit -m"Add snapshot tag ${version} $5 $6"
+ else
+ git commit -m"Add snapshot tag ${version}"
+ fi
+ fi
+
+ set_version_hash=$(git rev-list HEAD 2>/dev/null | head -n 1)
+
+ # delete remote tag
+ git fetch --tags --all
+ tag_exist=`git tag -l ${tagrc} | wc -l`
+ if [ ${tag_exist} -gt 0 ]; then
+ git tag -l ${tagrc} | xargs git tag -d
+ git push origin :refs/tags/${tagrc}
+ fi
+
+ # add remote tag
+ git tag -am "Apache Hudi $version" ${tagrc} ${set_version_hash}
+ remote=$(git remote -v | grep data-lake-technology/hudi.git | head -n 1 | awk '{print $1}')
+ git push ${remote} ${tagrc}
+
+ release_hash=$(git rev-list ${tagrc} 2>/dev/null | head -n 1)
+
+ if [ -z "$release_hash" ]; then
+ echo "Cannot continue: unknown git tag: $tag"
+ exit
+ fi
+
+ echo -e "Using commit ${release_hash}\n"
+
+ #echo "git push origin"
+ #git push origin
+
+ echo -e "begin archive ${release_hash}\n"
+ rm -rf ${tag}*
+ tarball=$tag.tar.gz
+
+ # be conservative and use the release hash, even though git produces the same
+ # archive (identical hashes) using the scm tag
+ git archive $release_hash --worktree-attributes --prefix $tag/ -o $tarball
+
+ # checksum
+ sha512sum $tarball >${tarball}.sha512
+
+ # extract source tarball
+ tar xzf ${tarball}
+
+ cd ${tag}
+ if [ ${release_repo} = "N" ]; then
+ echo $version >version.txt
+ fi
+
+ echo -e "end archive ${release_hash}\n"
+}
+
+function deploy_spark() {
+ echo -------------------------------------------------------
+ SCALA_VERSION=$1
+ SPARK_VERSION=$2
+ FLINK_VERSION=$3
+
+ if [ ${release_repo} = "Y" ]; then
+ COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Dflink${FLINK_VERSION} -DskipTests -s dev/settings.xml -DretryFailedDeploymentCount=30 -T 2.5C"
+ else
+ COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Dflink${FLINK_VERSION} -DskipTests -s dev/settings.xml -DretryFailedDeploymentCount=30 -T 2.5C"
+ fi
+
+ # INSTALL_OPTIONS="-U -Drat.skip=true -Djacoco.skip=true -Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -DskipTests -s dev/settings.xml -T 2.5C"
+ #
+ # echo "INSTALL_OPTIONS: mvn clean package ${INSTALL_OPTIONS}"
+ # mvn clean package ${INSTALL_OPTIONS}
+
+ echo "DEPLOY_OPTIONS: mvn clean install $COMMON_OPTIONS"
+ # mvn clean package install $COMMON_OPTIONS
+ mvn clean package install $COMMON_OPTIONS -Drat.skip=true
+
+ if [ ${release_repo} = "Y" ]; then
+ echo -e "Published to release repo\n"
+ else
+ echo -e "Published to snapshot repo\n"
+ fi
+ echo -------------------------------------------------------
+}
+
+echo "SCALA_VERSION: $5 SPARK_VERSION: $6"
+deploy_spark $5 $6 $7
+
+## spark 2.4.6
+#deploy_spark 2.11 2
+## spark 3.0.1
+#deploy_spark 2.12 3.0.x
+## spark 3.1.2
+#deploy_spark 2.12 3
+
+# clean
+#rm -rf ../${tag}*
+
+echo "Success! The release candidate [${tagrc}] is available"
+echo "Commit SHA1: ${release_hash}"
diff --git a/dev/tencent-release.sh b/dev/tencent-release.sh
new file mode 100644
index 0000000000..944f497070
--- /dev/null
+++ b/dev/tencent-release.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e # Exit immediately if a command exits with a non-zero status
+
+if [ $# -ne 7 ]; then
+ echo "Usage: $0 <apache-version> <tencent-version> <rc-num> <release-repo-not-snapshot?> <scala_version> <spark_version> <flink_version>"
+ echo "example: $0 0.12.0 1 1 N 2.11 2 1.13"
+ exit
+fi
+
+version=$1-$2-tencent # <apache-version>-<tencent-version>-tencent, e.g. 0.10.0-1-tencent
+if [ $4 = "N" ]; then
+ version=$version-SNAPSHOT
+fi
+rc=$3
+release_repo=$4 # Y for release repo, others for snapshot repo
+
+tag=apache-hudi-$version
+tagrc=${tag}-rc${rc}
+
+echo "Preparing source for $tagrc"
+
+# change version
+echo "Change version for ${version}"
+mvn versions:set -DnewVersion=${version} -DgenerateBackupPom=false -s dev/settings.xml -U
+mvn versions:commit -s dev/settings.xml -U
+
+# create version.txt for this release
+if [ ${release_repo} = "Y" ]; then
+ git add .
+
+ if [ $# -eq 7 ]; then
+ git commit -m "Add version tag for release ${version} $5 $6"
+ else
+ git commit -m "Add version tag for release ${version}"
+ fi
+else
+ git add .
+
+ if [ $# -eq 7 ]; then
+ git commit -m"Add snapshot tag ${version} $5 $6"
+ else
+ git commit -m"Add snapshot tag ${version}"
+ fi
+fi
+
+set_version_hash=`git rev-list HEAD 2> /dev/null | head -n 1 `
+
+# delete remote tag
+git fetch --tags --all
+tag_exist=`git tag -l ${tagrc} | wc -l`
+if [ ${tag_exist} -gt 0 ]; then
+ git tag -l ${tagrc} | xargs git tag -d
+ git push origin :refs/tags/${tagrc}
+fi
+
+# add remote tag
+git tag -am "Apache Hudi $version" ${tagrc} ${set_version_hash}
+remote=$(git remote -v | grep data-lake-technology/hudi.git | head -n 1 | awk '{print $1}')
+git push ${remote} ${tagrc}
+
+release_hash=`git rev-list ${tagrc} 2> /dev/null | head -n 1 `
+
+if [ -z "$release_hash" ]; then
+ echo "Cannot continue: unknown git tag: $tag"
+ exit
+fi
+
+echo -e "Using commit ${release_hash}\n"
+
+#echo "git push origin"
+#git push origin
+
+echo -e "begin archive ${release_hash}\n"
+rm -rf ${tag}*
+tarball=$tag.tar.gz
+
+# be conservative and use the release hash, even though git produces the same
+# archive (identical hashes) using the scm tag
+git archive $release_hash --worktree-attributes --prefix $tag/ -o $tarball
+
+# checksum
+sha512sum $tarball > ${tarball}.sha512
+
+# extract source tarball
+tar xzf ${tarball}
+
+cd ${tag}
+if [ ${release_repo} = "N" ]; then
+ echo $version > version.txt
+fi
+
+echo -e "end archive ${release_hash}\n"
+
+function deploy_spark(){
+ echo -------------------------------------------------------
+ SCALA_VERSION=$1
+ SPARK_VERSION=$2
+ FLINK_VERSION=$3
+
+ if [ ${release_repo} = "Y" ]; then
+ COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Dflink${FLINK_VERSION} -DskipTests -s dev/settings.xml -DretryFailedDeploymentCount=30"
+ else
+ COMMON_OPTIONS="-Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -Dflink${FLINK_VERSION} -DskipTests -s dev/settings.xml -DretryFailedDeploymentCount=30"
+ fi
+
+# INSTALL_OPTIONS="-U -Drat.skip=true -Djacoco.skip=true -Dscala-${SCALA_VERSION} -Dspark${SPARK_VERSION} -DskipTests -s dev/settings.xml -T 2.5C"
+#
+# echo "INSTALL_OPTIONS: mvn clean package ${INSTALL_OPTIONS}"
+# mvn clean package ${INSTALL_OPTIONS}
+
+ echo "DEPLOY_OPTIONS: mvn clean deploy $COMMON_OPTIONS"
+ mvn deploy $COMMON_OPTIONS
+
+ if [ ${release_repo} = "Y" ]; then
+ echo -e "Published to release repo\n"
+ else
+ echo -e "Published to snapshot repo\n"
+ fi
+ echo -------------------------------------------------------
+}
+
+echo "SCALA_VERSION: $5 SPARK_VERSION: $6"
+deploy_spark $5 $6 $7
+
+## spark 2.4.6
+#deploy_spark 2.11 2
+## spark 3.0.1
+#deploy_spark 2.12 3.0.x
+## spark 3.1.2
+#deploy_spark 2.12 3
+
+# clean
+rm -rf ../${tag}*
+
+echo "Success! The release candidate [${tagrc}] is available"
+echo "Commit SHA1: ${release_hash}"
diff --git a/hudi-cli/pom.xml b/hudi-cli/pom.xml
index ee78bf24b0..27596e779f 100644
--- a/hudi-cli/pom.xml
+++ b/hudi-cli/pom.xml
@@ -250,11 +250,11 @@
<!-- Spark -->
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency>
diff --git a/hudi-client/hudi-spark-client/pom.xml b/hudi-client/hudi-spark-client/pom.xml
index a7ae3a7049..da1ad6cb9f 100644
--- a/hudi-client/hudi-spark-client/pom.xml
+++ b/hudi-client/hudi-spark-client/pom.xml
@@ -57,11 +57,11 @@
<!-- Spark -->
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency>
diff --git a/hudi-examples/hudi-examples-spark/pom.xml b/hudi-examples/hudi-examples-spark/pom.xml
index 4eeb11ecb1..d0611c6752 100644
--- a/hudi-examples/hudi-examples-spark/pom.xml
+++ b/hudi-examples/hudi-examples-spark/pom.xml
@@ -189,11 +189,11 @@
<!-- Spark -->
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency>
diff --git a/hudi-integ-test/pom.xml b/hudi-integ-test/pom.xml
index 2134f80bb0..703cbb067f 100644
--- a/hudi-integ-test/pom.xml
+++ b/hudi-integ-test/pom.xml
@@ -62,7 +62,7 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
@@ -89,7 +89,7 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-avro_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>test</scope>
diff --git a/hudi-spark-datasource/hudi-spark-common/pom.xml b/hudi-spark-datasource/hudi-spark-common/pom.xml
index a1016299ba..6fd1d7d458 100644
--- a/hudi-spark-datasource/hudi-spark-common/pom.xml
+++ b/hudi-spark-datasource/hudi-spark-common/pom.xml
@@ -184,7 +184,7 @@
<!-- Spark -->
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
@@ -194,29 +194,29 @@
</exclusions>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
diff --git a/hudi-spark-datasource/hudi-spark/pom.xml b/hudi-spark-datasource/hudi-spark/pom.xml
index f55cb3359c..f4ad09bb57 100644
--- a/hudi-spark-datasource/hudi-spark/pom.xml
+++ b/hudi-spark-datasource/hudi-spark/pom.xml
@@ -245,7 +245,7 @@
<!-- Spark -->
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
@@ -255,31 +255,31 @@
</exclusions>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<scope>test</scope>
diff --git a/hudi-spark-datasource/hudi-spark2/pom.xml b/hudi-spark-datasource/hudi-spark2/pom.xml
index f74dd96a5b..63cc6f3a4f 100644
--- a/hudi-spark-datasource/hudi-spark2/pom.xml
+++ b/hudi-spark-datasource/hudi-spark2/pom.xml
@@ -21,10 +21,10 @@
</parent>
<modelVersion>4.0.0</modelVersion>
- <artifactId>hudi-spark2_${scala.binary.version}</artifactId>
+ <artifactId>hudi-spark2_2.11</artifactId>
<version>0.12.1</version>
- <name>hudi-spark2_${scala.binary.version}</name>
+ <name>hudi-spark2_2.11</name>
<packaging>jar</packaging>
<properties>
@@ -185,13 +185,13 @@
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
- <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+ <artifactId>hudi-spark-common_2.11</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
- <artifactId>spark-sql_${scala.binary.version}</artifactId>
+ <groupId>${spark.groupId}</groupId>
+ <artifactId>spark-sql_2.11</artifactId>
<version>${spark2.version}</version>
<scope>provided</scope>
<optional>true</optional>
@@ -230,7 +230,7 @@
</dependency>
<dependency>
<groupId>org.apache.hudi</groupId>
- <artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
+ <artifactId>hudi-spark-common_2.11</artifactId>
<version>${project.version}</version>
<classifier>tests</classifier>
<type>test-jar</type>
diff --git a/hudi-spark-datasource/hudi-spark3-common/pom.xml b/hudi-spark-datasource/hudi-spark3-common/pom.xml
index 75957d6d4c..6bbb4e42b4 100644
--- a/hudi-spark-datasource/hudi-spark3-common/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3-common/pom.xml
@@ -160,7 +160,7 @@
<dependencies>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>${spark3.version}</version>
<scope>provided</scope>
diff --git a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml
index 6768e0ce03..fb43cd2855 100644
--- a/hudi-spark-datasource/hudi-spark3.1.x/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3.1.x/pom.xml
@@ -151,7 +151,7 @@
<dependencies>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>${spark31.version}</version>
<optional>true</optional>
diff --git a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml
index cd6ba3a4b5..51f986e069 100644
--- a/hudi-spark-datasource/hudi-spark3.2.x/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3.2.x/pom.xml
@@ -174,7 +174,7 @@
<dependencies>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>${spark32.version}</version>
<scope>provided</scope>
@@ -182,7 +182,7 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-catalyst_2.12</artifactId>
<version>${spark32.version}</version>
<scope>provided</scope>
@@ -190,7 +190,7 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>${spark32.version}</version>
<scope>provided</scope>
diff --git a/hudi-spark-datasource/hudi-spark3.3.x/pom.xml b/hudi-spark-datasource/hudi-spark3.3.x/pom.xml
index 9ab65dca2e..65ce18d2d3 100644
--- a/hudi-spark-datasource/hudi-spark3.3.x/pom.xml
+++ b/hudi-spark-datasource/hudi-spark3.3.x/pom.xml
@@ -174,7 +174,7 @@
<dependencies>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_2.12</artifactId>
<version>${spark33.version}</version>
<scope>provided</scope>
@@ -182,7 +182,7 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-catalyst_2.12</artifactId>
<version>${spark33.version}</version>
<scope>provided</scope>
@@ -190,7 +190,7 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_2.12</artifactId>
<version>${spark33.version}</version>
<scope>provided</scope>
diff --git a/hudi-sync/hudi-hive-sync/pom.xml b/hudi-sync/hudi-hive-sync/pom.xml
index 7cf31550b6..9785d71c9e 100644
--- a/hudi-sync/hudi-hive-sync/pom.xml
+++ b/hudi-sync/hudi-hive-sync/pom.xml
@@ -139,13 +139,13 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
diff --git a/hudi-utilities/pom.xml b/hudi-utilities/pom.xml
index 0c2a612d78..93cb94b320 100644
--- a/hudi-utilities/pom.xml
+++ b/hudi-utilities/pom.xml
@@ -184,7 +184,7 @@
<!-- Spark -->
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
@@ -199,7 +199,7 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<exclusions>
<exclusion>
@@ -210,17 +210,17 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<classifier>tests</classifier>
diff --git a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
index 523546c9ef..4a38da6528 100644
--- a/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
+++ b/hudi-utilities/src/main/java/org/apache/hudi/utilities/UtilHelpers.java
@@ -83,6 +83,7 @@ import org.apache.spark.util.LongAccumulator;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
+import java.lang.reflect.Method;
import java.nio.ByteBuffer;
import java.sql.Connection;
import java.sql.Driver;
@@ -421,7 +422,7 @@ public class UtilHelpers {
statement.setQueryTimeout(Integer.parseInt(options.get(JDBCOptions.JDBC_QUERY_TIMEOUT())));
statement.executeQuery();
} catch (SQLException e) {
- throw new HoodieException(e);
+ return false;
}
return true;
}
@@ -445,12 +446,23 @@ public class UtilHelpers {
statement.setQueryTimeout(Integer.parseInt(options.get("queryTimeout")));
try (ResultSet rs = statement.executeQuery()) {
StructType structType;
+ Object[] methodParas;
+ Method method = getMethodByName(JdbcUtils.class, "getSchema");
+ int parasCount = getMethodParasCount(method);
+
if (Boolean.parseBoolean(options.get("nullable"))) {
- structType = JdbcUtils.getSchema(rs, dialect, true);
+ methodParas = parasCount == 3 ? new Object[] {rs, dialect, true} : new Object[] {method, rs, dialect, url, true};
} else {
- structType = JdbcUtils.getSchema(rs, dialect, false);
+ methodParas = parasCount == 3 ? new Object[] {rs, dialect, false} : new Object[] {method, rs, dialect, url, false};
+ }
+
+ structType = getStructTypeReflection(method, methodParas);
+
+ if (structType != null) {
+ return AvroConversionUtils.convertStructTypeToAvroSchema(structType, table, "hoodie." + table);
+ } else {
+ throw new HoodieException(String.format("%s structType can not null!", table));
}
- return AvroConversionUtils.convertStructTypeToAvroSchema(structType, table, "hoodie." + table);
}
}
} else {
@@ -572,4 +584,22 @@ public class UtilHelpers {
Schema schema = schemaResolver.getTableAvroSchema(false);
return schema.toString();
}
+
+ public static Method getMethodByName(Class clazz, String methodName) {
+ return Arrays.stream(clazz.getDeclaredMethods())
+ .filter(m -> m.getName().equalsIgnoreCase(methodName))
+ .findFirst().orElse(null);
+ }
+
+ public static int getMethodParasCount(Method method) {
+ return method.getParameterCount();
+ }
+
+ public static StructType getStructTypeReflection(Method method, Object... objs) throws Exception {
+ if (method != null) {
+ return (StructType) method.invoke(null, objs);
+ } else {
+ return null;
+ }
+ }
}
diff --git a/packaging/hudi-integ-test-bundle/pom.xml b/packaging/hudi-integ-test-bundle/pom.xml
index d1789b863a..8323703622 100644
--- a/packaging/hudi-integ-test-bundle/pom.xml
+++ b/packaging/hudi-integ-test-bundle/pom.xml
@@ -646,12 +646,12 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
</dependency>
@@ -662,14 +662,14 @@
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-streaming-kafka-0-10_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
diff --git a/pom.xml b/pom.xml
index 47e53fed97..159ae2a841 100644
--- a/pom.xml
+++ b/pom.xml
@@ -125,6 +125,7 @@
<spark2.version>2.4.4</spark2.version>
<spark3.version>3.3.0</spark3.version>
<sparkbundle.version></sparkbundle.version>
+ <spark.groupId>com.tencent.spark</spark.groupId>
<flink1.15.version>1.15.1</flink1.15.version>
<flink1.14.version>1.14.5</flink1.14.version>
<flink1.13.version>1.13.6</flink1.13.version>
@@ -142,7 +143,7 @@
<flink.clients.artifactId>flink-clients</flink.clients.artifactId>
<flink.connector.kafka.artifactId>flink-connector-kafka</flink.connector.kafka.artifactId>
<flink.hadoop.compatibility.artifactId>flink-hadoop-compatibility_2.12</flink.hadoop.compatibility.artifactId>
- <spark31.version>3.1.3</spark31.version>
+ <spark31.version>3.1.2</spark31.version>
<spark32.version>3.2.2</spark32.version>
<spark33.version>3.3.0</spark33.version>
<hudi.spark.module>hudi-spark2</hudi.spark.module>
@@ -724,7 +725,7 @@
<!-- Spark -->
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
@@ -740,26 +741,26 @@
</exclusions>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<scope>provided</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<version>${spark.version}</version>
<scope>test</scope>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<version>${spark.version}</version>
@@ -776,7 +777,7 @@
</exclusions>
</dependency>
<dependency>
- <groupId>org.apache.spark</groupId>
+ <groupId>${spark.groupId}</groupId>
<artifactId>spark-catalyst_${scala.binary.version}</artifactId>
<classifier>tests</classifier>
<version>${spark.version}</version>
@@ -1472,33 +1473,6 @@
</dependency>
</dependencies>
</dependencyManagement>
- <repositories>
- <repository>
- <id>Maven Central</id>
- <name>Maven Repository</name>
- <url>https://repo.maven.apache.org/maven2</url>
- <releases>
- <enabled>true</enabled>
- </releases>
- <snapshots>
- <enabled>false</enabled>
- </snapshots>
- </repository>
- <repository>
- <id>cloudera-repo-releases</id>
- <url>https://repository.cloudera.com/artifactory/public/</url>
- <releases>
- <enabled>true</enabled>
- </releases>
- <snapshots>
- <enabled>false</enabled>
- </snapshots>
- </repository>
- <repository>
- <id>confluent</id>
- <url>https://packages.confluent.io/maven/</url>
- </repository>
- </repositories>
<profiles>
<profile>
@@ -1985,7 +1959,7 @@
<profile>
<id>spark3.1</id>
<properties>
- <spark3.version>3.1.3</spark3.version>
+ <spark3.version>3.1.2</spark3.version>
<spark.version>${spark3.version}</spark.version>
<sparkbundle.version>3.1</sparkbundle.version>
<scala.version>${scala12.version}</scala.version>
@@ -2137,6 +2111,8 @@
<flink.clients.artifactId>flink-clients_${scala.binary.version}</flink.clients.artifactId>
<flink.connector.kafka.artifactId>flink-connector-kafka_${scala.binary.version}</flink.connector.kafka.artifactId>
<flink.hadoop.compatibility.artifactId>flink-hadoop-compatibility_${scala.binary.version}</flink.hadoop.compatibility.artifactId>
+ <hudi.flink.module>hudi-flink1.13.x</hudi.flink.module>
+ <flink.bundle.version>1.13</flink.bundle.version>
<skipITs>true</skipITs>
</properties>
<activation>
@@ -2157,6 +2133,54 @@
</property>
</activation>
</profile>
+
+ <profile>
+ <id>community</id>
+ <properties>
+ <spark.groupId>org.apache.spark</spark.groupId>
+ </properties>
+ <repositories>
+ <repository>
+ <id>Maven Central</id>
+ <name>Maven Repository</name>
+ <url>https://repo.maven.apache.org/maven2</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>cloudera-repo-releases</id>
+ <url>https://repository.cloudera.com/artifactory/public/</url>
+ <releases>
+ <enabled>true</enabled>
+ </releases>
+ <snapshots>
+ <enabled>false</enabled>
+ </snapshots>
+ </repository>
+ <repository>
+ <id>confluent</id>
+ <url>https://packages.confluent.io/maven/</url>
+ </repository>
+ </repositories>
+ </profile>
</profiles>
+ <distributionManagement>
+ <repository>
+ <id>tianqiong-releases</id>
+ <name>Tianqiong Release Repository</name>
+ <url>https://mirrors.tencent.com/repository/maven/tianqiong-releases</url>
+ </repository>
+
+ <snapshotRepository>
+ <id>tianqiong-snapshots</id>
+ <name>Tianqiong Snapshots Repository</name>
+ <url>https://mirrors.tencent.com/repository/maven/tianqiong-snapshots</url>
+ </snapshotRepository>
+ </distributionManagement>
+
</project>