You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by si...@apache.org on 2023/03/27 16:45:01 UTC

[hudi] branch master updated: [HUDI-5984] Enable FT for spark3.x versions in CI (#8293)

This is an automated email from the ASF dual-hosted git repository.

sivabalan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new cd4f7f551b8 [HUDI-5984] Enable FT for spark3.x versions in CI (#8293)
cd4f7f551b8 is described below

commit cd4f7f551b8bddfd80adc5c603e2e14e015c3b63
Author: Shiyan Xu <27...@users.noreply.github.com>
AuthorDate: Mon Mar 27 11:44:52 2023 -0500

    [HUDI-5984] Enable FT for spark3.x versions in CI (#8293)
    
    - Enabling FT coverage in GH actions CI for spark3.x
---
 .github/workflows/bot.yml                          | 17 +++++++++-
 .../functional/TestHiveTableSchemaEvolution.java   | 37 +++++++++++-----------
 .../hudi/functional/TestColumnStatsIndex.scala     |  3 ++
 3 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/bot.yml b/.github/workflows/bot.yml
index 31857bb0b20..f5d6d0e5461 100644
--- a/.github/workflows/bot.yml
+++ b/.github/workflows/bot.yml
@@ -23,6 +23,7 @@ on:
       - 'release-*'
 env:
   MVN_ARGS: -e -ntp -B -V -Pwarn-log -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.shade=warn -Dorg.slf4j.simpleLogger.log.org.apache.maven.plugins.dependency=warn
+  SPARK_COMMON_MODULES: hudi-spark-datasource/hudi-spark,hudi-spark-datasource/hudi-spark-common
 
 jobs:
   validate-source:
@@ -52,18 +53,23 @@ jobs:
         include:
           - scalaProfile: "scala-2.11"
             sparkProfile: "spark2.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark2"
 
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark2.4"
+            sparkModules: "hudi-spark-datasource/hudi-spark2"
 
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.1"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.1.x"
 
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.2"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.2.x"
 
           - scalaProfile: "scala-2.12"
             sparkProfile: "spark3.3"
+            sparkModules: "hudi-spark-datasource/hudi-spark3.3.x"
 
     steps:
       - uses: actions/checkout@v2
@@ -89,9 +95,18 @@ jobs:
         env:
           SCALA_PROFILE: ${{ matrix.scalaProfile }}
           SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
         if: ${{ !endsWith(env.SPARK_PROFILE, '2.4') }} # skip test spark 2.4 as it's covered by Azure CI
         run:
-          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl hudi-common,hudi-spark-datasource/hudi-spark $MVN_ARGS
+          mvn test -Punit-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "hudi-common,$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
+      - name: FT - Spark
+        env:
+          SCALA_PROFILE: ${{ matrix.scalaProfile }}
+          SPARK_PROFILE: ${{ matrix.sparkProfile }}
+          SPARK_MODULES: ${{ matrix.sparkModules }}
+        if: ${{ !endsWith(env.SPARK_PROFILE, '2.4') }} # skip test spark 2.4 as it's covered by Azure CI
+        run:
+          mvn test -Pfunctional-tests -D"$SCALA_PROFILE" -D"$SPARK_PROFILE" -pl "$SPARK_COMMON_MODULES,$SPARK_MODULES" $MVN_ARGS
 
   test-flink:
     runs-on: ubuntu-latest
diff --git a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHiveTableSchemaEvolution.java b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHiveTableSchemaEvolution.java
index 5fca2b8bffe..027224dbe60 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHiveTableSchemaEvolution.java
+++ b/hudi-spark-datasource/hudi-spark/src/test/java/org/apache/hudi/functional/TestHiveTableSchemaEvolution.java
@@ -18,14 +18,6 @@
 
 package org.apache.hudi.functional;
 
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
-import org.apache.hadoop.hive.serde.serdeConstants;
-import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.mapred.FileInputFormat;
-import org.apache.hadoop.mapred.InputSplit;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapred.RecordReader;
 import org.apache.hudi.HoodieSparkUtils;
 import org.apache.hudi.common.fs.FSUtils;
 import org.apache.hudi.hadoop.HoodieParquetInputFormat;
@@ -34,25 +26,36 @@ import org.apache.hudi.hadoop.realtime.HoodieEmptyRecordReader;
 import org.apache.hudi.hadoop.realtime.HoodieRealtimeRecordReader;
 import org.apache.hudi.hadoop.realtime.RealtimeCompactedRecordReader;
 import org.apache.hudi.hadoop.realtime.RealtimeSplit;
+
+import com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
+import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.RecordReader;
 import org.apache.spark.SparkConf;
 import org.apache.spark.sql.SparkSession;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Tag;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.util.Date;
 
 import static org.apache.hudi.testutils.HoodieClientTestUtils.getSparkConfForTest;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
-import com.uber.hoodie.hadoop.realtime.HoodieRealtimeInputFormat;
-
-import java.io.File;
-import java.util.Date;
-
 @Tag("functional")
 public class TestHiveTableSchemaEvolution {
 
   private SparkSession sparkSession = null;
 
+  @TempDir
+  java.nio.file.Path basePath;
+
   @BeforeEach
   public void setUp() {
     initSparkContexts("HiveSchemaEvolution");
@@ -74,10 +77,9 @@ public class TestHiveTableSchemaEvolution {
   @Test
   public void testCopyOnWriteTableForHive() throws Exception {
     String tableName = "huditest" + new Date().getTime();
-    File file = new File(System.getProperty("java.io.tmpdir") + tableName);
     if (HoodieSparkUtils.gteqSpark3_1()) {
       sparkSession.sql("set hoodie.schema.on.read.enable=true");
-      String path = new Path(file.getCanonicalPath()).toUri().toString();
+      String path = new Path(basePath.toAbsolutePath().toString()).toUri().toString();
       sparkSession.sql("create table " + tableName + "(col0 int, col1 float, col2 string) using hudi options(type='cow', primaryKey='col0', preCombineField='col1') location '" + path + "'");
       sparkSession.sql("insert into " + tableName + " values(1, 1.1, 'text')");
       sparkSession.sql("alter table " + tableName + " alter column col1 type double");
@@ -95,10 +97,9 @@ public class TestHiveTableSchemaEvolution {
   @Test
   public void testMergeOnReadTableForHive() throws Exception {
     String tableName = "huditest" + new Date().getTime();
-    File file = new File(System.getProperty("java.io.tmpdir") + tableName);
     if (HoodieSparkUtils.gteqSpark3_1()) {
       sparkSession.sql("set hoodie.schema.on.read.enable=true");
-      String path = new Path(file.getCanonicalPath()).toUri().toString();
+      String path = new Path(basePath.toAbsolutePath().toString()).toUri().toString();
       sparkSession.sql("create table " + tableName + "(col0 int, col1 float, col2 string) using hudi options(type='cow', primaryKey='col0', preCombineField='col1') location '" + path + "'");
       sparkSession.sql("insert into " + tableName + " values(1, 1.1, 'text')");
       sparkSession.sql("insert into " + tableName + " values(2, 1.2, 'text2')");
@@ -146,4 +147,4 @@ public class TestHiveTableSchemaEvolution {
         + "_hoodie_record_key,_hoodie_partition_path,_hoodie_file_name,col0,col1,col2");
     assertEquals(jobConf.get(serdeConstants.LIST_COLUMN_TYPES), "string,string,string,string,string,int,double,string");
   }
-}
\ No newline at end of file
+}
diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
index 5eb78529080..c6dd7ac6170 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestColumnStatsIndex.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.hudi.DataSkippingUtils.translateIntoColumnStatsIndex
 import org.apache.spark.sql.types._
 import org.junit.jupiter.api.Assertions.{assertEquals, assertNotNull, assertTrue}
 import org.junit.jupiter.api._
+import org.junit.jupiter.api.condition.DisabledIf
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.{Arguments, EnumSource, MethodSource, ValueSource}
 
@@ -48,6 +49,8 @@ import scala.collection.JavaConverters._
 import scala.util.Random
 
 @Tag("functional")
+@DisabledIf(value = "org.apache.hudi.HoodieSparkUtils#gteqSpark3_3",
+  disabledReason = "Jackson version conflicts (HUDI-5352)")
 class TestColumnStatsIndex extends HoodieSparkClientTestBase {
   var spark: SparkSession = _