You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by mt...@apache.org on 2021/10/19 10:10:23 UTC
[hadoop] branch branch-3.3 updated: HADOOP-17945. JsonSerialization
raises EOFException reading JSON data stored on google GCS (#3501)
This is an automated email from the ASF dual-hosted git repository.
mthakur pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/hadoop.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new b8f3e54 HADOOP-17945. JsonSerialization raises EOFException reading JSON data stored on google GCS (#3501)
b8f3e54 is described below
commit b8f3e54ff79d530c262e62866043c9737e8de2a5
Author: Steve Loughran <st...@cloudera.com>
AuthorDate: Tue Oct 19 11:03:37 2021 +0100
HADOOP-17945. JsonSerialization raises EOFException reading JSON data stored on google GCS (#3501)
Contributed By: Steve Loughran
---
.../java/org/apache/hadoop/fs/PathIOException.java | 8 +++-
.../org/apache/hadoop/util/JsonSerialization.java | 54 +++++++++++++++-------
.../apache/hadoop/util/TestJsonSerialization.java | 38 ++++++++++++++-
3 files changed, 80 insertions(+), 20 deletions(-)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
index deb3880..f32f2a9 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
@@ -64,7 +64,13 @@ public class PathIOException extends IOException {
this.path = path;
}
- protected PathIOException(String path, String error, Throwable cause) {
+ /**
+ * Use a subclass of PathIOException if possible.
+ * @param path for the exception
+ * @param error custom string to use an the error text
+ * @param cause cause of exception.
+ */
+ public PathIOException(String path, String error, Throwable cause) {
super(error, cause);
this.path = path;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
index 7a26eca..a80f958 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/JsonSerialization.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.util;
+import javax.annotation.Nullable;
import java.io.EOFException;
import java.io.File;
import java.io.FileNotFoundException;
@@ -43,8 +44,13 @@ import org.slf4j.LoggerFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FutureDataInputStreamBuilder;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIOException;
+
+import static org.apache.hadoop.util.functional.FutureIO.awaitFuture;
/**
* Support for marshalling objects to and from JSON.
@@ -229,30 +235,44 @@ public class JsonSerialization<T> {
/**
* Load from a Hadoop filesystem.
- * There's a check for data availability after the file is open, by
- * raising an EOFException if stream.available == 0.
- * This allows for a meaningful exception without the round trip overhead
- * of a getFileStatus call before opening the file. It may be brittle
- * against an FS stream which doesn't return a value here, but the
- * standard filesystems all do.
- * JSON parsing and mapping problems
- * are converted to IOEs.
* @param fs filesystem
* @param path path
* @return a loaded object
- * @throws IOException IO or JSON parse problems
+ * @throws PathIOException JSON parse problem
+ * @throws IOException IO problems
*/
public T load(FileSystem fs, Path path) throws IOException {
- try (FSDataInputStream dataInputStream = fs.open(path)) {
- // throw an EOF exception if there is no data available.
- if (dataInputStream.available() == 0) {
- throw new EOFException("No data in " + path);
- }
+ return load(fs, path, null);
+ }
+
+ /**
+ * Load from a Hadoop filesystem.
+ * If a file status is supplied, it's passed in to the openFile()
+ * call so that FS implementations can optimize their opening.
+ * @param fs filesystem
+ * @param path path
+ * @param status status of the file to open.
+ * @return a loaded object
+ * @throws PathIOException JSON parse problem
+ * @throws EOFException file status references an empty file
+ * @throws IOException IO problems
+ */
+ public T load(FileSystem fs, Path path, @Nullable FileStatus status)
+ throws IOException {
+
+ if (status != null && status.getLen() == 0) {
+ throw new EOFException("No data in " + path);
+ }
+ FutureDataInputStreamBuilder builder = fs.openFile(path);
+ if (status != null) {
+ builder.withFileStatus(status);
+ }
+ try (FSDataInputStream dataInputStream =
+ awaitFuture(builder.build())) {
return fromJsonStream(dataInputStream);
} catch (JsonProcessingException e) {
- throw new IOException(
- String.format("Failed to read JSON file \"%s\": %s", path, e),
- e);
+ throw new PathIOException(path.toString(),
+ "Failed to read JSON file " + e, e);
}
}
diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java
index 991697d..4a106e8 100644
--- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java
+++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/util/TestJsonSerialization.java
@@ -28,9 +28,11 @@ import com.fasterxml.jackson.core.JsonParseException;
import org.junit.Test;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocalFileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathIOException;
import org.apache.hadoop.test.HadoopTestBase;
import org.apache.hadoop.test.LambdaTestUtils;
@@ -151,6 +153,9 @@ public class TestJsonSerialization extends HadoopTestBase {
}
}
+ /**
+ * round trip through both load APIs.
+ */
@Test
public void testFileSystemRoundTrip() throws Throwable {
File tempFile = File.createTempFile("Keyval", ".json");
@@ -159,19 +164,30 @@ public class TestJsonSerialization extends HadoopTestBase {
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
try {
serDeser.save(fs, tempPath, source, false);
- assertEquals(source, serDeser.load(fs, tempPath));
+ assertEquals("JSON loaded with load(fs, path)",
+ source,
+ serDeser.load(fs, tempPath));
+ assertEquals("JSON loaded with load(fs, path, status)",
+ source,
+ serDeser.load(fs, tempPath, fs.getFileStatus(tempPath)));
} finally {
fs.delete(tempPath, false);
}
}
+ /**
+ * 0 byte file through the load(path) API will fail with a wrapped
+ * Parser exception.
+ * 0 byte file through the load(path, status) API will fail with a wrapped
+ * Parser exception.
+ */
@Test
public void testFileSystemEmptyPath() throws Throwable {
File tempFile = File.createTempFile("Keyval", ".json");
Path tempPath = new Path(tempFile.toURI());
LocalFileSystem fs = FileSystem.getLocal(new Configuration());
try {
- LambdaTestUtils.intercept(EOFException.class,
+ LambdaTestUtils.intercept(PathIOException.class,
() -> serDeser.load(fs, tempPath));
fs.delete(tempPath, false);
LambdaTestUtils.intercept(FileNotFoundException.class,
@@ -181,5 +197,23 @@ public class TestJsonSerialization extends HadoopTestBase {
}
}
+ /**
+ * 0 byte file through the load(path, status) API will fail with an
+ * EOFException.
+ */
+ @Test
+ public void testFileSystemEmptyStatus() throws Throwable {
+ File tempFile = File.createTempFile("Keyval", ".json");
+ Path tempPath = new Path(tempFile.toURI());
+ LocalFileSystem fs = FileSystem.getLocal(new Configuration());
+ try {
+ final FileStatus st = fs.getFileStatus(tempPath);
+ LambdaTestUtils.intercept(EOFException.class,
+ () -> serDeser.load(fs, tempPath, st));
+ } finally {
+ fs.delete(tempPath, false);
+ }
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org
For additional commands, e-mail: common-commits-help@hadoop.apache.org