You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by zs...@apache.org on 2016/12/07 18:30:08 UTC
spark git commit: [SPARK-18764][CORE] Add a warning log when skipping
a corrupted file
Repository: spark
Updated Branches:
refs/heads/master f1fca81b1 -> dbf3e298a
[SPARK-18764][CORE] Add a warning log when skipping a corrupted file
## What changes were proposed in this pull request?
It's better to add a warning log when skipping a corrupted file. It will be helpful when we want to finish the job first, then find them in the log and fix these files.
## How was this patch tested?
Jenkins
Author: Shixiong Zhu <sh...@databricks.com>
Closes #16192 from zsxwing/SPARK-18764.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dbf3e298
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dbf3e298
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dbf3e298
Branch: refs/heads/master
Commit: dbf3e298a1a35c0243f087814ddf88034ff96d66
Parents: f1fca81
Author: Shixiong Zhu <sh...@databricks.com>
Authored: Wed Dec 7 10:30:05 2016 -0800
Committer: Shixiong Zhu <sh...@databricks.com>
Committed: Wed Dec 7 10:30:05 2016 -0800
----------------------------------------------------------------------
core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala | 4 +++-
core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala | 6 +++++-
.../apache/spark/sql/execution/datasources/FileScanRDD.scala | 1 +
3 files changed, 9 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/dbf3e298/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
index e3d81a6..6e87233 100644
--- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala
@@ -259,7 +259,9 @@ class HadoopRDD[K, V](
try {
finished = !reader.next(key, value)
} catch {
- case e: IOException if ignoreCorruptFiles => finished = true
+ case e: IOException if ignoreCorruptFiles =>
+ logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e)
+ finished = true
}
if (!finished) {
inputMetrics.incRecordsRead(1)
http://git-wip-us.apache.org/repos/asf/spark/blob/dbf3e298/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index e90e84c..e805192 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -189,7 +189,11 @@ class NewHadoopRDD[K, V](
try {
finished = !reader.nextKeyValue
} catch {
- case e: IOException if ignoreCorruptFiles => finished = true
+ case e: IOException if ignoreCorruptFiles =>
+ logWarning(
+ s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}",
+ e)
+ finished = true
}
if (finished) {
// Close and release the reader here; close() will also be called when the task
http://git-wip-us.apache.org/repos/asf/spark/blob/dbf3e298/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
index 306dc65..6d8cd81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileScanRDD.scala
@@ -139,6 +139,7 @@ class FileScanRDD(
}
} catch {
case e: IOException =>
+ logWarning(s"Skipped the rest content in the corrupted file: $currentFile", e)
finished = true
null
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org