You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by ka...@apache.org on 2021/02/03 23:46:50 UTC
[spark] branch master updated: [SPARK-34326][CORE][SQL] Fix UTs
added in SPARK-31793 depending on the length of temp path
This is an automated email from the ASF dual-hosted git repository.
kabhwan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 44dcf00 [SPARK-34326][CORE][SQL] Fix UTs added in SPARK-31793 depending on the length of temp path
44dcf00 is described below
commit 44dcf0062c41ff4230096bee800d9b4f70c424ce
Author: Jungtaek Lim (HeartSaVioR) <ka...@gmail.com>
AuthorDate: Thu Feb 4 08:46:11 2021 +0900
[SPARK-34326][CORE][SQL] Fix UTs added in SPARK-31793 depending on the length of temp path
### What changes were proposed in this pull request?
This PR proposes to fix the UTs being added in SPARK-31793, so that all things contributing the length limit are properly accounted.
### Why are the changes needed?
The test `DataSourceScanExecRedactionSuite.SPARK-31793: FileSourceScanExec metadata should contain limited file paths` is failing conditionally, depending on the length of the temp directory.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Modified UTs explain the missing points, which also do the test.
Closes #31449 from HeartSaVioR/SPARK-34326-v2.
Authored-by: Jungtaek Lim (HeartSaVioR) <ka...@gmail.com>
Signed-off-by: Jungtaek Lim <ka...@gmail.com>
---
.../scala/org/apache/spark/util/UtilsSuite.scala | 6 +++++
.../DataSourceScanExecRedactionSuite.scala | 26 +++++++++++++++++-----
2 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 8fb4080..18ff960 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1308,6 +1308,12 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
assert(Utils.buildLocationMetadata(paths, 10) == "[path0, path1]")
assert(Utils.buildLocationMetadata(paths, 15) == "[path0, path1, path2]")
assert(Utils.buildLocationMetadata(paths, 25) == "[path0, path1, path2, path3]")
+
+ // edge-case: we should consider the fact non-path chars including '[' and ", " are accounted
+ // 1. second path is not added due to the addition of '['
+ assert(Utils.buildLocationMetadata(paths, 6) == "[path0]")
+ // 2. third path is not added due to the addition of ", "
+ assert(Utils.buildLocationMetadata(paths, 13) == "[path0, path1]")
}
test("checkHost supports both IPV4 and IPV6") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
index c99be98..ccac5a0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/DataSourceScanExecRedactionSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution
import java.io.File
import scala.collection.mutable
+import scala.util.Random
import org.apache.hadoop.fs.Path
@@ -122,14 +123,21 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest {
test("SPARK-31793: FileSourceScanExec metadata should contain limited file paths") {
withTempPath { path =>
val dir = path.getCanonicalPath
+
+ // create a sub-directory with long name so that each root path will always exceed the limit
+ // this is to ensure we always test the case for the path truncation
+ val dataDirName = Random.alphanumeric.take(100).toList.mkString
+ val dataDir = new File(path, dataDirName)
+ dataDir.mkdir()
+
val partitionCol = "partitionCol"
spark.range(10)
.select("id", "id")
.toDF("value", partitionCol)
.write
.partitionBy(partitionCol)
- .orc(dir)
- val paths = (0 to 9).map(i => new File(dir, s"$partitionCol=$i").getCanonicalPath)
+ .orc(dataDir.getCanonicalPath)
+ val paths = (0 to 9).map(i => new File(dataDir, s"$partitionCol=$i").getCanonicalPath)
val plan = spark.read.orc(paths: _*).queryExecution.executedPlan
val location = plan collectFirst {
case f: FileSourceScanExec => f.metadata("Location")
@@ -137,9 +145,17 @@ class DataSourceScanExecRedactionSuite extends DataSourceScanRedactionTest {
assert(location.isDefined)
// The location metadata should at least contain one path
assert(location.get.contains(paths.head))
- // If the temp path length is larger than 100, the metadata length should not exceed
- // twice of the length; otherwise, the metadata length should be controlled within 200.
- assert(location.get.length < Math.max(paths.head.length, 100) * 2)
+
+ // The location metadata should have bracket wrapping paths
+ assert(location.get.indexOf('[') > -1)
+ assert(location.get.indexOf(']') > -1)
+
+ // extract paths in location metadata (removing classname, brackets, separators)
+ val pathsInLocation = location.get.substring(
+ location.get.indexOf('[') + 1, location.get.indexOf(']')).split(", ").toSeq
+
+ // the only one path should be available
+ assert(pathsInLocation.size == 1)
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org