You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by yi...@apache.org on 2023/01/30 04:56:39 UTC

[hudi] 06/19: [HUDI-5630] Fixing flaky parquet projection tests (#7768)

This is an automated email from the ASF dual-hosted git repository.

yihua pushed a commit to branch release-0.13.0
in repository https://gitbox.apache.org/repos/asf/hudi.git

commit 01acf99e27967cec22199e2718eff30fb2a6a80c
Author: Sivabalan Narayanan <n....@gmail.com>
AuthorDate: Fri Jan 27 22:09:46 2023 -0800

    [HUDI-5630] Fixing flaky parquet projection tests (#7768)
    
    Fixing flaky parquet projection tests. Added 10% margin for expected bytes from col projection.
---
 .../hudi/functional/TestParquetColumnProjection.scala   | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
index 214b7256417..66d63d4a871 100644
--- a/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
+++ b/hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestParquetColumnProjection.scala
@@ -18,6 +18,7 @@
 package org.apache.hudi.functional
 
 import org.apache.avro.Schema
+import org.apache.calcite.runtime.SqlFunctions.abs
 import org.apache.hudi.HoodieBaseRelation.projectSchema
 import org.apache.hudi.common.config.{HoodieMetadataConfig, HoodieStorageConfig}
 import org.apache.hudi.common.model.{HoodieRecord, OverwriteNonDefaultsWithLatestAvroPayload}
@@ -31,8 +32,9 @@ import org.apache.parquet.hadoop.util.counters.BenchmarkCounter
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.{Dataset, HoodieUnsafeUtils, Row, SaveMode}
-import org.junit.jupiter.api.Assertions.{assertEquals, fail}
+import org.junit.jupiter.api.Assertions.{assertEquals, assertTrue, fail}
 import org.junit.jupiter.api.{Disabled, Tag, Test}
+
 import scala.collection.JavaConverters._
 
 @Tag("functional")
@@ -235,9 +237,9 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
     else if (HoodieSparkUtils.isSpark2)
     // TODO re-enable tests (these tests are very unstable currently)
       Array(
-        ("rider", -1),
-        ("rider,driver", -1),
-        ("rider,driver,tip_history", -1))
+        ("rider", 14160),
+        ("rider,driver", 14160),
+        ("rider,driver,tip_history", 14160))
     else
       fail("Only Spark 3 and Spark 2 are currently supported")
 
@@ -326,11 +328,8 @@ class TestParquetColumnProjection extends SparkClientFunctionalTestHarness with
         else targetRecordCount
 
       assertEquals(expectedRecordCount, rows.length)
-      if (expectedBytesRead != -1) {
-        assertEquals(expectedBytesRead, bytesRead)
-      } else {
-        logWarning(s"Not matching bytes read ($bytesRead)")
-      }
+      // verify within 10% of margin.
+      assertTrue((abs(expectedBytesRead - bytesRead) / expectedBytesRead) < 0.1)
 
       val readColumns = targetColumns ++ relation.mandatoryFields
       val (_, projectedStructType, _) = projectSchema(Left(tableState.schema), readColumns)