You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by va...@apache.org on 2016/09/29 22:40:41 UTC

spark git commit: [SPARK-17676][CORE] FsHistoryProvider should ignore hidden files

Repository: spark
Updated Branches:
  refs/heads/master 29396e7d1 -> 3993ebca2


[SPARK-17676][CORE] FsHistoryProvider should ignore hidden files

## What changes were proposed in this pull request?

FsHistoryProvider was writing a hidden file (to check the fs's clock).
Even though it deleted the file immediately, sometimes another thread
would try to scan the files on the fs in-between, and then there would
be an error msg logged which was very misleading for the end-user.
(The logged error was harmless, though.)

## How was this patch tested?

I added one unit test, but to be clear, that test was passing before.  The actual change in behavior in that test is just logging (after the change, there is no more logged error), which I just manually verified.

Author: Imran Rashid <ir...@cloudera.com>

Closes #15250 from squito/SPARK-17676.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3993ebca
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3993ebca
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3993ebca

Branch: refs/heads/master
Commit: 3993ebca23afa4b8770695051635933a6c9d2c11
Parents: 29396e7
Author: Imran Rashid <ir...@cloudera.com>
Authored: Thu Sep 29 15:40:35 2016 -0700
Committer: Marcelo Vanzin <va...@cloudera.com>
Committed: Thu Sep 29 15:40:35 2016 -0700

----------------------------------------------------------------------
 .../deploy/history/FsHistoryProvider.scala      |  7 +++-
 .../deploy/history/FsHistoryProviderSuite.scala | 36 ++++++++++++++++++--
 2 files changed, 40 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/3993ebca/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index d494ff0..c5740e4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -294,7 +294,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         .filter { entry =>
           try {
             val prevFileSize = fileToAppInfo.get(entry.getPath()).map{_.fileSize}.getOrElse(0L)
-            !entry.isDirectory() && prevFileSize < entry.getLen()
+            !entry.isDirectory() &&
+              // FsHistoryProvider generates a hidden file which can't be read.  Accidentally
+              // reading a garbage file is safe, but we would log an error which can be scary to
+              // the end-user.
+              !entry.getPath().getName().startsWith(".") &&
+              prevFileSize < entry.getLen()
           } catch {
             case e: AccessControlException =>
               // Do not use "logInfo" since these messages can get pretty noisy if printed on

http://git-wip-us.apache.org/repos/asf/spark/blob/3993ebca/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
----------------------------------------------------------------------
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 39c5857..01bef0a 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.deploy.history
 
-import java.io.{BufferedOutputStream, ByteArrayInputStream, ByteArrayOutputStream, File,
-  FileOutputStream, OutputStreamWriter}
+import java.io._
 import java.net.URI
 import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeUnit
@@ -394,6 +393,39 @@ class FsHistoryProviderSuite extends SparkFunSuite with BeforeAndAfter with Matc
     }
   }
 
+  test("ignore hidden files") {
+
+    // FsHistoryProvider should ignore hidden files.  (It even writes out a hidden file itself
+    // that should be ignored).
+
+    // write out one totally bogus hidden file
+    val hiddenGarbageFile = new File(testDir, ".garbage")
+    val out = new PrintWriter(hiddenGarbageFile)
+    // scalastyle:off println
+    out.println("GARBAGE")
+    // scalastyle:on println
+    out.close()
+
+    // also write out one real event log file, but since its a hidden file, we shouldn't read it
+    val tmpNewAppFile = newLogFile("hidden", None, inProgress = false)
+    val hiddenNewAppFile = new File(tmpNewAppFile.getParentFile, "." + tmpNewAppFile.getName)
+    tmpNewAppFile.renameTo(hiddenNewAppFile)
+
+    // and write one real file, which should still get picked up just fine
+    val newAppComplete = newLogFile("real-app", None, inProgress = false)
+    writeFile(newAppComplete, true, None,
+      SparkListenerApplicationStart(newAppComplete.getName(), Some("new-app-complete"), 1L, "test",
+        None),
+      SparkListenerApplicationEnd(5L)
+    )
+
+    val provider = new FsHistoryProvider(createTestConf())
+    updateAndCheck(provider) { list =>
+      list.size should be (1)
+      list(0).name should be ("real-app")
+    }
+  }
+
   /**
    * Asks the provider to check for logs and calls a function to perform checks on the updated
    * app list. Example:


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org