You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by gu...@apache.org on 2022/03/24 03:17:09 UTC
[spark] branch branch-3.2 updated: [SPARK-38631][CORE] Uses Java-based implementation for un-tarring at Utils.unpack
This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new 271b338 [SPARK-38631][CORE] Uses Java-based implementation for un-tarring at Utils.unpack
271b338 is described below
commit 271b338324e88b0bfc63364937a56305f7b350fc
Author: Hyukjin Kwon <gu...@apache.org>
AuthorDate: Thu Mar 24 12:12:38 2022 +0900
[SPARK-38631][CORE] Uses Java-based implementation for un-tarring at Utils.unpack
### What changes were proposed in this pull request?
This PR proposes to use `FileUtil.unTarUsingJava` that is a Java implementation for un-tar `.tar` files. `unTarUsingJava` is not public but it exists in all Hadoop versions from 2.1+, see HADOOP-9264.
The security issue reproduction requires a non-Windows platform, and a non-gzipped TAR archive file name (contents don't matter).
### Why are the changes needed?
There is a risk for arbitrary shell command injection via `Utils.unpack` when the filename is controlled by a malicious user. This is due to an issue in Hadoop's `unTar`, that is not properly escaping the filename before passing to a shell command:https://github.com/apache/hadoop/blob/trunk/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java#L904
### Does this PR introduce _any_ user-facing change?
Yes, it prevents a security issue that, previously, allowed users to execute arbitrary shall command.
### How was this patch tested?
Manually tested in local, and existing test cases should cover.
Closes #35946 from HyukjinKwon/SPARK-38631.
Authored-by: Hyukjin Kwon <gu...@apache.org>
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
(cherry picked from commit 057c051285ec32c665fb458d0670c1c16ba536b2)
Signed-off-by: Hyukjin Kwon <gu...@apache.org>
---
.../main/scala/org/apache/spark/util/Utils.scala | 30 ++++++++++++++++++++--
1 file changed, 28 insertions(+), 2 deletions(-)
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 776c8c4..956f1c6f 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -592,16 +592,42 @@ private[spark] object Utils extends Logging {
if (lowerSrc.endsWith(".jar")) {
RunJar.unJar(source, dest, RunJar.MATCH_ANY)
} else if (lowerSrc.endsWith(".zip")) {
+ // TODO(SPARK-37677): should keep file permissions. Java implementation doesn't.
FileUtil.unZip(source, dest)
- } else if (
- lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz") || lowerSrc.endsWith(".tar")) {
+ } else if (lowerSrc.endsWith(".tar.gz") || lowerSrc.endsWith(".tgz")) {
FileUtil.unTar(source, dest)
+ } else if (lowerSrc.endsWith(".tar")) {
+ // TODO(SPARK-38632): should keep file permissions. Java implementation doesn't.
+ unTarUsingJava(source, dest)
} else {
logWarning(s"Cannot unpack $source, just copying it to $dest.")
copyRecursive(source, dest)
}
}
+ /**
+ * The method below was copied from `FileUtil.unTar` but uses Java-based implementation
+ * to work around a security issue, see also SPARK-38631.
+ */
+ private def unTarUsingJava(source: File, dest: File): Unit = {
+ if (!dest.mkdirs && !dest.isDirectory) {
+ throw new IOException(s"Mkdirs failed to create $dest")
+ } else {
+ try {
+ // Should not fail because all Hadoop 2.1+ (from HADOOP-9264)
+ // have 'unTarUsingJava'.
+ val mth = classOf[FileUtil].getDeclaredMethod(
+ "unTarUsingJava", classOf[File], classOf[File], classOf[Boolean])
+ mth.setAccessible(true)
+ mth.invoke(null, source, dest, java.lang.Boolean.FALSE)
+ } catch {
+ // Re-throw the original exception.
+ case e: java.lang.reflect.InvocationTargetException if e.getCause != null =>
+ throw e.getCause
+ }
+ }
+ }
+
/** Records the duration of running `body`. */
def timeTakenMs[T](body: => T): (T, Long) = {
val startTime = System.nanoTime()
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org