You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2022/04/28 14:33:20 UTC

[drill] branch master updated: DRILL-8202: Add Options to Skip Malformed JSON Records to HTTP Plugin (#2524)

This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 00e4ec64c9 DRILL-8202: Add Options to Skip Malformed JSON Records to HTTP Plugin (#2524)
00e4ec64c9 is described below

commit 00e4ec64c9ec7cc03bd02c8242b1b12ef62a4ad8
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Thu Apr 28 10:33:13 2022 -0400

    DRILL-8202: Add Options to Skip Malformed JSON Records to HTTP Plugin (#2524)
    
    * DRILL-8202: Add Options to Skip Malformed JSON Records to HTTP Plugin
    
    * Fixed typo
---
 contrib/storage-http/README.md                     |  2 +
 .../drill/exec/store/http/HttpJsonOptions.java     | 46 +++++++++++++++++++++-
 .../drill/exec/store/http/TestHttpPlugin.java      | 39 +++++++++++++++++-
 .../src/test/resources/data/malformed.json         |  1 +
 4 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/contrib/storage-http/README.md b/contrib/storage-http/README.md
index 6c19ae630a..7a808cfecc 100644
--- a/contrib/storage-http/README.md
+++ b/contrib/storage-http/README.md
@@ -260,6 +260,8 @@ allows you to configure these options individually per connection and override t
   throw schema change exceptions. In addition to `allTextMode`, you can make Drill less sensitive by setting the `readNumbersAsDouble` to `true` which causes Drill to read all 
   numeric fields in JSON data as `double` data type rather than trying to distinguish between ints and doubles.
 * `enableEscapeAnyChar`:  Allows a user to escape any character with a \
+* `skipMalformedRecords`:  Allows Drill to skip malformed records and recover without throwing exceptions.
+* `skipMalformedDocument`:  Allows Drill to skip entire malformed documents without throwing errors.
 
 All of these can be set by adding the `jsonOptions` to your connection configuration as shown below:
 
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java
index 2d9a72aff7..edd77f92e4 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java
@@ -45,11 +45,20 @@ public class HttpJsonOptions {
   @JsonProperty
   private final Boolean enableEscapeAnyChar;
 
+  @JsonProperty
+  private final Boolean skipMalformedRecords;
+
+  @JsonProperty
+  private final Boolean skipMalformedDocument;
+
+
   HttpJsonOptions(HttpJsonOptionsBuilder builder) {
     this.allowNanInf = builder.allowNanInf;
     this.allTextMode = builder.allTextMode;
     this.readNumbersAsDouble = builder.readNumbersAsDouble;
     this.enableEscapeAnyChar = builder.enableEscapeAnyChar;
+    this.skipMalformedRecords = builder.skipMalformedRecords;
+    this.skipMalformedDocument = builder.skipMalformedDocument;
   }
 
   public static HttpJsonOptionsBuilder builder() {
@@ -71,6 +80,12 @@ public class HttpJsonOptions {
     if (enableEscapeAnyChar != null) {
       options.enableEscapeAnyChar = enableEscapeAnyChar;
     }
+    if (skipMalformedDocument != null) {
+      options.skipMalformedDocument = skipMalformedDocument;
+    }
+    if (skipMalformedRecords != null) {
+      options.skipMalformedRecords = skipMalformedRecords;
+    }
     return options;
   }
 
@@ -94,6 +109,16 @@ public class HttpJsonOptions {
     return this.enableEscapeAnyChar;
   }
 
+  @JsonProperty("skipMalformedRecords")
+  public Boolean skipMalformedRecords() {
+    return this.skipMalformedRecords;
+  }
+
+  @JsonProperty("skipMalformedDocument")
+  public Boolean skipMalformedDocument() {
+    return this.skipMalformedDocument;
+  }
+
   @Override
   public boolean equals(Object o) {
     if (this == o) {
@@ -106,12 +131,13 @@ public class HttpJsonOptions {
     return Objects.equals(allowNanInf, that.allowNanInf)
       && Objects.equals(allTextMode, that.allTextMode)
       && Objects.equals(readNumbersAsDouble, that.readNumbersAsDouble)
-      && Objects.equals(enableEscapeAnyChar, that.enableEscapeAnyChar);
+      && Objects.equals(enableEscapeAnyChar, that.enableEscapeAnyChar)
+      && Objects.equals(skipMalformedRecords, that.skipMalformedRecords);
   }
 
   @Override
   public int hashCode() {
-    return Objects.hash(allowNanInf, allTextMode, readNumbersAsDouble, enableEscapeAnyChar);
+    return Objects.hash(allowNanInf, allTextMode, readNumbersAsDouble, enableEscapeAnyChar, skipMalformedRecords, skipMalformedDocument);
   }
 
   @Override
@@ -121,6 +147,8 @@ public class HttpJsonOptions {
       .field("allTextMode", allTextMode)
       .field("readNumbersAsDouble", readNumbersAsDouble)
       .field("enableEscapeAnyChar", enableEscapeAnyChar)
+      .field("skipMalformedRecords", skipMalformedRecords)
+      .field("skipMalformedDocument", skipMalformedDocument)
       .toString();
   }
 
@@ -134,6 +162,10 @@ public class HttpJsonOptions {
 
     private Boolean enableEscapeAnyChar;
 
+    private Boolean skipMalformedRecords;
+
+    private Boolean skipMalformedDocument;
+
     public HttpJsonOptionsBuilder allowNanInf(Boolean allowNanInf) {
       this.allowNanInf = allowNanInf;
       return this;
@@ -154,6 +186,16 @@ public class HttpJsonOptions {
       return this;
     }
 
+    public HttpJsonOptionsBuilder skipMalformedRecords(Boolean skipMalformedRecords) {
+      this.skipMalformedRecords = skipMalformedRecords;
+      return this;
+    }
+
+    public HttpJsonOptionsBuilder skipMalformedDocument(Boolean skipMalformedDocument) {
+      this.skipMalformedDocument = skipMalformedDocument;
+      return this;
+    }
+
     public HttpJsonOptions build() {
       return new HttpJsonOptions(this);
     }
diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
index 918d2eded2..79f3ee7939 100644
--- a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
+++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
@@ -74,6 +74,7 @@ public class TestHttpPlugin extends ClusterTest {
   // build machine.
   private static final int MOCK_SERVER_PORT = 44332;
   private static String TEST_JSON_RESPONSE;
+  private static String TEST_MALFORMED_JSON_RESPONSE;
   private static String TEST_CSV_RESPONSE;
   private static String TEST_XML_RESPONSE;
   private static String TEST_JSON_RESPONSE_WITH_DATATYPES;
@@ -87,6 +88,7 @@ public class TestHttpPlugin extends ClusterTest {
     startCluster(ClusterFixture.builder(dirTestWatcher));
 
     TEST_JSON_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.json"), Charsets.UTF_8).read();
+    TEST_MALFORMED_JSON_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/malformed.json"), Charsets.UTF_8).read();
     TEST_CSV_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.csv"), Charsets.UTF_8).read();
     TEST_XML_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.xml"), Charsets.UTF_8).read();
     TEST_JSON_RESPONSE_WITH_DATATYPES = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response2.json"), Charsets.UTF_8).read();
@@ -245,6 +247,17 @@ public class TestHttpPlugin extends ClusterTest {
       .inputType("json")
       .build();
 
+    HttpApiConfig mockJsonWithMalformedData = HttpApiConfig.builder()
+      .url(makeUrl("http://localhost:%d/json"))
+      .method("get")
+      .requireTail(false)
+      .jsonOptions(new HttpJsonOptions.HttpJsonOptionsBuilder()
+        .skipMalformedRecords(true)
+        .build())
+      .inputType("json")
+      .build();
+
+
     HttpApiConfig mockPostConfigWithoutPostBody = HttpApiConfig.builder()
       .url(makeUrl("http://localhost:%d/"))
       .method("POST")
@@ -339,6 +352,7 @@ public class TestHttpPlugin extends ClusterTest {
     configs.put("github2", mockGithubWithDuplicateParam);
     configs.put("github3", mockGithubWithParamInQuery);
     configs.put("mockJsonAllText", mockTableWithJsonOptions);
+    configs.put("malformedJson", mockJsonWithMalformedData);
 
     HttpStoragePluginConfig mockStorageConfigWithWorkspace =
         new HttpStoragePluginConfig(false, configs, 2, "globaluser", "globalpass", "",
@@ -537,7 +551,30 @@ public class TestHttpPlugin extends ClusterTest {
 
       RowSetUtilities.verify(expected, results);
     } catch (Exception e) {
-      System.out.println(e.getMessage());
+      fail();
+    }
+  }
+
+  @Test
+  public void simpleTestWithMalformedJson() {
+    String sql = "SELECT * FROM local.malformedJson";
+
+    try (MockWebServer server = startServer()) {
+      server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_MALFORMED_JSON_RESPONSE));
+      RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+      TupleMetadata expectedSchema = new SchemaBuilder()
+        .addNullable("a", MinorType.BIGINT)
+        .build();
+
+      RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+        .addRow(1)
+        .addRow(5)
+        .addRow(6)
+        .build();
+
+      RowSetUtilities.verify(expected, results);
+    } catch (Exception e) {
       fail();
     }
   }
diff --git a/contrib/storage-http/src/test/resources/data/malformed.json b/contrib/storage-http/src/test/resources/data/malformed.json
new file mode 100644
index 0000000000..58a045d22b
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/malformed.json
@@ -0,0 +1 @@
+{a: 1} {a: {a: 3} {a: 4} {a: 5} {a: 6}