You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by mt...@apache.org on 2021/10/19 10:10:23 UTC

[hadoop] branch branch-3.3 updated: HADOOP-17945. JsonSerialization raises EOFException reading JSON data stored on google GCS (#3501)

This is an automated email from the ASF dual-hosted git repository.

mthakur pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new b8f3e54  HADOOP-17945. JsonSerialization raises EOFException reading JSON data stored on google GCS (#3501)
b8f3e54 is described below

commit b8f3e54ff79d530c262e62866043c9737e8de2a5
Author: Steve Loughran <st...@cloudera.com>
AuthorDate: Tue Oct 19 11:03:37 2021 +0100

    HADOOP-17945. JsonSerialization raises EOFException reading JSON data stored on google GCS (#3501)
    
    
    Contributed By: Steve Loughran
---
 .../java/org/apache/hadoop/fs/PathIOException.java |  8 +++-
 .../org/apache/hadoop/util/JsonSerialization.java  | 54 +++++++++++++++-------
 .../apache/hadoop/util/TestJsonSerialization.java  | 38 ++++++++++++++-
 3 files changed, 80 insertions(+), 20 deletions(-)

diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
index deb3880..f32f2a9 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
@@ -64,7 +64,13 @@ public class PathIOException extends IOException {
     this.path = path;
   }
 
-  protected PathIOException(String path, String error, Throwable cause) {
+  /**
+   * Use a subclass of PathIOException if possible.
+   * @param path for the exception
+   * @param error custom string to use an the error text
+   * @param cause cause of exception.
+   */
+  public PathIOException(String path, String error, Throwable cause) {
     super(error, cause);
     this.path = path;
   }
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
index 7a26eca..a80f958 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
@@ -18,6 +18,7 @@
 
 package org.apache.hadoop.util;
 
+import javax.annotation.Nullable;
 import java.io.EOFException;
 import java.io.File;
 import java.io.FileNotFoundException;
@@ -43,8 +44,13 @@ import org.slf4j.LoggerFactory;
 import org.apache.hadoop.classification.InterfaceAudience;
 import org.apache.hadoop.classification.InterfaceStability;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FutureDataInputStreamBuilder;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIOException;
+
+import static org.apache.hadoop.util.functional.FutureIO.awaitFuture;
 
 /**
  * Support for marshalling objects to and from JSON.
@@ -229,30 +235,44 @@ public class JsonSerialization<T> {
 
   /**
    * Load from a Hadoop filesystem.
-   * There's a check for data availability after the file is open, by
-   * raising an EOFException if stream.available == 0.
-   * This allows for a meaningful exception without the round trip overhead
-   * of a getFileStatus call before opening the file. It may be brittle
-   * against an FS stream which doesn't return a value here, but the
-   * standard filesystems all do.
-   * JSON parsing and mapping problems
-   * are converted to IOEs.
    * @param fs filesystem
    * @param path path
    * @return a loaded object
-   * @throws IOException IO or JSON parse problems
+   * @throws PathIOException JSON parse problem
+   * @throws IOException IO problems
    */
   public T load(FileSystem fs, Path path) throws IOException {
-    try (FSDataInputStream dataInputStream = fs.open(path)) {
-      // throw an EOF exception if there is no data available.
-      if (dataInputStream.available() == 0) {
-        throw new EOFException("No data in " + path);
-      }
+    return load(fs, path, null);
+  }
+
+  /**
+   * Load from a Hadoop filesystem.
+   * If a file status is supplied, it's passed in to the openFile()
+   * call so that FS implementations can optimize their opening.
+   * @param fs filesystem
+   * @param path path
+   * @param status status of the file to open.
+   * @return a loaded object
+   * @throws PathIOException JSON parse problem
+   * @throws EOFException file status references an empty file
+   * @throws IOException IO problems
+   */
+  public T load(FileSystem fs, Path path, @Nullable FileStatus status)
+      throws IOException {
+
+    if (status != null && status.getLen() == 0) {
+      throw new EOFException("No data in " + path);
+    }
+    FutureDataInputStreamBuilder builder = fs.openFile(path);
+    if (status != null) {
+      builder.withFileStatus(status);
+    }
+    try (FSDataInputStream dataInputStream =
+             awaitFuture(builder.build())) {
       return fromJsonStream(dataInputStream);
     } catch (JsonProcessingException e) {
-      throw new IOException(
-          String.format("Failed to read JSON file \"%s\": %s", path, e),
-          e);
+      throw new PathIOException(path.toString(),
+          "Failed to read JSON file " + e, e);
     }
   }
 
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java
index 991697d..4a106e8 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java
@@ -28,9 +28,11 @@ import com.fasterxml.jackson.core.JsonParseException;
 import org.junit.Test;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.LocalFileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIOException;
 import org.apache.hadoop.test.HadoopTestBase;
 import org.apache.hadoop.test.LambdaTestUtils;
 
@@ -151,6 +153,9 @@ public class TestJsonSerialization extends HadoopTestBase {
     }
   }
 
+  /**
+   * round trip through both load APIs.
+   */
   @Test
   public void testFileSystemRoundTrip() throws Throwable {
     File tempFile = File.createTempFile("Keyval", ".json");
@@ -159,19 +164,30 @@ public class TestJsonSerialization extends HadoopTestBase {
     LocalFileSystem fs = FileSystem.getLocal(new Configuration());
     try {
       serDeser.save(fs, tempPath, source, false);
-      assertEquals(source, serDeser.load(fs, tempPath));
+      assertEquals("JSON loaded with load(fs, path)",
+          source,
+          serDeser.load(fs, tempPath));
+      assertEquals("JSON loaded with load(fs, path, status)",
+          source,
+          serDeser.load(fs, tempPath, fs.getFileStatus(tempPath)));
     } finally {
       fs.delete(tempPath, false);
     }
   }
 
+  /**
+   * 0 byte file through the load(path) API will fail with a wrapped
+   * Parser exception.
+   * 0 byte file through the load(path, status) API will fail with a wrapped
+   * Parser exception.
+   */
   @Test
   public void testFileSystemEmptyPath() throws Throwable {
     File tempFile = File.createTempFile("Keyval", ".json");
     Path tempPath = new Path(tempFile.toURI());
     LocalFileSystem fs = FileSystem.getLocal(new Configuration());
     try {
-      LambdaTestUtils.intercept(EOFException.class,
+      LambdaTestUtils.intercept(PathIOException.class,
           () -> serDeser.load(fs, tempPath));
       fs.delete(tempPath, false);
       LambdaTestUtils.intercept(FileNotFoundException.class,
@@ -181,5 +197,23 @@ public class TestJsonSerialization extends HadoopTestBase {
     }
   }
 
+  /**
+   * 0 byte file through the load(path, status) API will fail with an
+   * EOFException.
+   */
+  @Test
+  public void testFileSystemEmptyStatus() throws Throwable {
+    File tempFile = File.createTempFile("Keyval", ".json");
+    Path tempPath = new Path(tempFile.toURI());
+    LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+    try {
+      final FileStatus st = fs.getFileStatus(tempPath);
+      LambdaTestUtils.intercept(EOFException.class,
+          () -> serDeser.load(fs, tempPath, st));
+    } finally {
+      fs.delete(tempPath, false);
+    }
+  }
+
 
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org