You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by pw...@apache.org on 2014/06/05 00:56:37 UTC
git commit: SPARK-1518: FileLogger: Fix compile against Hadoop trunk
Repository: spark
Updated Branches:
refs/heads/master 189df165b -> 1765c8d0d
SPARK-1518: FileLogger: Fix compile against Hadoop trunk
In Hadoop trunk (currently Hadoop 3.0.0), the deprecated
FSDataOutputStream#sync() method has been removed. Instead, we should
call FSDataOutputStream#hflush, which does the same thing as the
deprecated method used to do.
Author: Colin McCabe <cm...@cloudera.com>
Closes #898 from cmccabe/SPARK-1518 and squashes the following commits:
752b9d7 [Colin McCabe] FileLogger: Fix compile against Hadoop trunk
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1765c8d0
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1765c8d0
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1765c8d0
Branch: refs/heads/master
Commit: 1765c8d0ddf6bb5bc3c21f994456eba04c581de4
Parents: 189df16
Author: Colin McCabe <cm...@cloudera.com>
Authored: Wed Jun 4 15:56:29 2014 -0700
Committer: Patrick Wendell <pw...@gmail.com>
Committed: Wed Jun 4 15:56:29 2014 -0700
----------------------------------------------------------------------
.../scala/org/apache/spark/util/FileLogger.scala | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/1765c8d0/core/src/main/scala/org/apache/spark/util/FileLogger.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/util/FileLogger.scala b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
index 0e6d21b..6a95dc0 100644
--- a/core/src/main/scala/org/apache/spark/util/FileLogger.scala
+++ b/core/src/main/scala/org/apache/spark/util/FileLogger.scala
@@ -61,6 +61,14 @@ private[spark] class FileLogger(
// Only defined if the file system scheme is not local
private var hadoopDataStream: Option[FSDataOutputStream] = None
+ // The Hadoop APIs have changed over time, so we use reflection to figure out
+ // the correct method to use to flush a hadoop data stream. See SPARK-1518
+ // for details.
+ private val hadoopFlushMethod = {
+ val cls = classOf[FSDataOutputStream]
+ scala.util.Try(cls.getMethod("hflush")).getOrElse(cls.getMethod("sync"))
+ }
+
private var writer: Option[PrintWriter] = None
/**
@@ -149,13 +157,13 @@ private[spark] class FileLogger(
/**
* Flush the writer to disk manually.
*
- * If the Hadoop FileSystem is used, the underlying FSDataOutputStream (r1.0.4) must be
- * sync()'ed manually as it does not support flush(), which is invoked by when higher
- * level streams are flushed.
+ * When using a Hadoop filesystem, we need to invoke the hflush or sync
+ * method. In HDFS, hflush guarantees that the data gets to all the
+ * DataNodes.
*/
def flush() {
writer.foreach(_.flush())
- hadoopDataStream.foreach(_.sync())
+ hadoopDataStream.foreach(hadoopFlushMethod.invoke(_))
}
/**