You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2022/04/28 14:33:20 UTC
[drill] branch master updated: DRILL-8202: Add Options to Skip Malformed JSON Records to HTTP Plugin (#2524)
This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 00e4ec64c9 DRILL-8202: Add Options to Skip Malformed JSON Records to HTTP Plugin (#2524)
00e4ec64c9 is described below
commit 00e4ec64c9ec7cc03bd02c8242b1b12ef62a4ad8
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Thu Apr 28 10:33:13 2022 -0400
DRILL-8202: Add Options to Skip Malformed JSON Records to HTTP Plugin (#2524)
* DRILL-8202: Add Options to Skip Malformed JSON Records to HTTP Plugin
* Fixed typo
---
contrib/storage-http/README.md | 2 +
.../drill/exec/store/http/HttpJsonOptions.java | 46 +++++++++++++++++++++-
.../drill/exec/store/http/TestHttpPlugin.java | 39 +++++++++++++++++-
.../src/test/resources/data/malformed.json | 1 +
4 files changed, 85 insertions(+), 3 deletions(-)
diff --git a/contrib/storage-http/README.md b/contrib/storage-http/README.md
index 6c19ae630a..7a808cfecc 100644
--- a/contrib/storage-http/README.md
+++ b/contrib/storage-http/README.md
@@ -260,6 +260,8 @@ allows you to configure these options individually per connection and override t
throw schema change exceptions. In addition to `allTextMode`, you can make Drill less sensitive by setting the `readNumbersAsDouble` to `true` which causes Drill to read all
numeric fields in JSON data as `double` data type rather than trying to distinguish between ints and doubles.
* `enableEscapeAnyChar`: Allows a user to escape any character with a \
+* `skipMalformedRecords`: Allows Drill to skip malformed records and recover without throwing exceptions.
+* `skipMalformedDocument`: Allows Drill to skip entire malformed documents without throwing errors.
All of these can be set by adding the `jsonOptions` to your connection configuration as shown below:
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java
index 2d9a72aff7..edd77f92e4 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java
@@ -45,11 +45,20 @@ public class HttpJsonOptions {
@JsonProperty
private final Boolean enableEscapeAnyChar;
+ @JsonProperty
+ private final Boolean skipMalformedRecords;
+
+ @JsonProperty
+ private final Boolean skipMalformedDocument;
+
+
HttpJsonOptions(HttpJsonOptionsBuilder builder) {
this.allowNanInf = builder.allowNanInf;
this.allTextMode = builder.allTextMode;
this.readNumbersAsDouble = builder.readNumbersAsDouble;
this.enableEscapeAnyChar = builder.enableEscapeAnyChar;
+ this.skipMalformedRecords = builder.skipMalformedRecords;
+ this.skipMalformedDocument = builder.skipMalformedDocument;
}
public static HttpJsonOptionsBuilder builder() {
@@ -71,6 +80,12 @@ public class HttpJsonOptions {
if (enableEscapeAnyChar != null) {
options.enableEscapeAnyChar = enableEscapeAnyChar;
}
+ if (skipMalformedDocument != null) {
+ options.skipMalformedDocument = skipMalformedDocument;
+ }
+ if (skipMalformedRecords != null) {
+ options.skipMalformedRecords = skipMalformedRecords;
+ }
return options;
}
@@ -94,6 +109,16 @@ public class HttpJsonOptions {
return this.enableEscapeAnyChar;
}
+ @JsonProperty("skipMalformedRecords")
+ public Boolean skipMalformedRecords() {
+ return this.skipMalformedRecords;
+ }
+
+ @JsonProperty("skipMalformedDocument")
+ public Boolean skipMalformedDocument() {
+ return this.skipMalformedDocument;
+ }
+
@Override
public boolean equals(Object o) {
if (this == o) {
@@ -106,12 +131,13 @@ public class HttpJsonOptions {
return Objects.equals(allowNanInf, that.allowNanInf)
&& Objects.equals(allTextMode, that.allTextMode)
&& Objects.equals(readNumbersAsDouble, that.readNumbersAsDouble)
- && Objects.equals(enableEscapeAnyChar, that.enableEscapeAnyChar);
+ && Objects.equals(enableEscapeAnyChar, that.enableEscapeAnyChar)
+ && Objects.equals(skipMalformedRecords, that.skipMalformedRecords);
}
@Override
public int hashCode() {
- return Objects.hash(allowNanInf, allTextMode, readNumbersAsDouble, enableEscapeAnyChar);
+ return Objects.hash(allowNanInf, allTextMode, readNumbersAsDouble, enableEscapeAnyChar, skipMalformedRecords, skipMalformedDocument);
}
@Override
@@ -121,6 +147,8 @@ public class HttpJsonOptions {
.field("allTextMode", allTextMode)
.field("readNumbersAsDouble", readNumbersAsDouble)
.field("enableEscapeAnyChar", enableEscapeAnyChar)
+ .field("skipMalformedRecords", skipMalformedRecords)
+ .field("skipMalformedDocument", skipMalformedDocument)
.toString();
}
@@ -134,6 +162,10 @@ public class HttpJsonOptions {
private Boolean enableEscapeAnyChar;
+ private Boolean skipMalformedRecords;
+
+ private Boolean skipMalformedDocument;
+
public HttpJsonOptionsBuilder allowNanInf(Boolean allowNanInf) {
this.allowNanInf = allowNanInf;
return this;
@@ -154,6 +186,16 @@ public class HttpJsonOptions {
return this;
}
+ public HttpJsonOptionsBuilder skipMalformedRecords(Boolean skipMalformedRecords) {
+ this.skipMalformedRecords = skipMalformedRecords;
+ return this;
+ }
+
+ public HttpJsonOptionsBuilder skipMalformedDocument(Boolean skipMalformedDocument) {
+ this.skipMalformedDocument = skipMalformedDocument;
+ return this;
+ }
+
public HttpJsonOptions build() {
return new HttpJsonOptions(this);
}
diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
index 918d2eded2..79f3ee7939 100644
--- a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
+++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
@@ -74,6 +74,7 @@ public class TestHttpPlugin extends ClusterTest {
// build machine.
private static final int MOCK_SERVER_PORT = 44332;
private static String TEST_JSON_RESPONSE;
+ private static String TEST_MALFORMED_JSON_RESPONSE;
private static String TEST_CSV_RESPONSE;
private static String TEST_XML_RESPONSE;
private static String TEST_JSON_RESPONSE_WITH_DATATYPES;
@@ -87,6 +88,7 @@ public class TestHttpPlugin extends ClusterTest {
startCluster(ClusterFixture.builder(dirTestWatcher));
TEST_JSON_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.json"), Charsets.UTF_8).read();
+ TEST_MALFORMED_JSON_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/malformed.json"), Charsets.UTF_8).read();
TEST_CSV_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.csv"), Charsets.UTF_8).read();
TEST_XML_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.xml"), Charsets.UTF_8).read();
TEST_JSON_RESPONSE_WITH_DATATYPES = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response2.json"), Charsets.UTF_8).read();
@@ -245,6 +247,17 @@ public class TestHttpPlugin extends ClusterTest {
.inputType("json")
.build();
+ HttpApiConfig mockJsonWithMalformedData = HttpApiConfig.builder()
+ .url(makeUrl("http://localhost:%d/json"))
+ .method("get")
+ .requireTail(false)
+ .jsonOptions(new HttpJsonOptions.HttpJsonOptionsBuilder()
+ .skipMalformedRecords(true)
+ .build())
+ .inputType("json")
+ .build();
+
+
HttpApiConfig mockPostConfigWithoutPostBody = HttpApiConfig.builder()
.url(makeUrl("http://localhost:%d/"))
.method("POST")
@@ -339,6 +352,7 @@ public class TestHttpPlugin extends ClusterTest {
configs.put("github2", mockGithubWithDuplicateParam);
configs.put("github3", mockGithubWithParamInQuery);
configs.put("mockJsonAllText", mockTableWithJsonOptions);
+ configs.put("malformedJson", mockJsonWithMalformedData);
HttpStoragePluginConfig mockStorageConfigWithWorkspace =
new HttpStoragePluginConfig(false, configs, 2, "globaluser", "globalpass", "",
@@ -537,7 +551,30 @@ public class TestHttpPlugin extends ClusterTest {
RowSetUtilities.verify(expected, results);
} catch (Exception e) {
- System.out.println(e.getMessage());
+ fail();
+ }
+ }
+
+ @Test
+ public void simpleTestWithMalformedJson() {
+ String sql = "SELECT * FROM local.malformedJson";
+
+ try (MockWebServer server = startServer()) {
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_MALFORMED_JSON_RESPONSE));
+ RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("a", MinorType.BIGINT)
+ .build();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(1)
+ .addRow(5)
+ .addRow(6)
+ .build();
+
+ RowSetUtilities.verify(expected, results);
+ } catch (Exception e) {
fail();
}
}
diff --git a/contrib/storage-http/src/test/resources/data/malformed.json b/contrib/storage-http/src/test/resources/data/malformed.json
new file mode 100644
index 0000000000..58a045d22b
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/malformed.json
@@ -0,0 +1 @@
+{a: 1} {a: {a: 3} {a: 4} {a: 5} {a: 6}