You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2021/11/03 12:09:13 UTC
[drill] branch master updated: DRILL-8020: Add JSON Configuration
Options to HTTP Rest Plugin (#2348)
This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 9ff0c5b DRILL-8020: Add JSON Configuration Options to HTTP Rest Plugin (#2348)
9ff0c5b is described below
commit 9ff0c5bebcaed8066e19c199d9932b136632e2c0
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Wed Nov 3 08:09:03 2021 -0400
DRILL-8020: Add JSON Configuration Options to HTTP Rest Plugin (#2348)
* DRILL-8020: Add JSON Configuration Options to HTTP Rest Plugin
* Addressed review comments
* Addressed Review Comments
* Changed to Boolean
* Removed unused import
---
contrib/storage-http/README.md | 23 +++++++
.../drill/exec/store/http/HttpApiConfig.java | 5 ++
.../drill/exec/store/http/HttpBatchReader.java | 18 +++--
.../drill/exec/store/http/HttpJsonOptions.java | 79 ++++++++++++++++++++++
.../drill/exec/store/http/TestHttpPlugin.java | 40 +++++++++++
.../src/test/resources/data/response2.json | 10 +++
6 files changed, 171 insertions(+), 4 deletions(-)
diff --git a/contrib/storage-http/README.md b/contrib/storage-http/README.md
index 77ffc35..a287f9d 100644
--- a/contrib/storage-http/README.md
+++ b/contrib/storage-http/README.md
@@ -244,6 +244,29 @@ The REST plugin accepts three different types of input: `json`, `csv` and `xml`.
configuration option called `xmlDataLevel` which reduces the level of unneeded nesting found in XML files. You can find more information in the documentation for Drill's XML
format plugin.
+#### JSON Configuration
+Drill has a collection of JSON configuration options to allow you to configure how Drill interprets JSON files. These are set at the global level, however the HTTP plugin
+allows you to configure these options individually per connection and override the Drill defaults. The options are:
+
+* `allowNanInf`: Configures the connection to interpret `NaN` and `Inf` values
+* `allTextMode`: By default, Drill attempts to infer data types from JSON data. If the data is malformed, Drill may throw schema change exceptions. If your data is
+ inconsistent, you can enable `allTextMode` which when true, Drill will read all JSON values as strings, rather than try to infer the data type.
+* `readNumbersAsDouble`: By default Drill will attempt to interpret integers, floating point number types and strings. One challenge is when data is consistent, Drill may
+ throw schema change exceptions. In addition to `allTextMode`, you can make Drill less sensitive by setting the `readNumbersAsDouble` to `true` which causes Drill to read all
+ numeric fields in JSON data as `double` data type rather than trying to distinguish between ints and doubles.
+* `enableEscapeAnyChar`: Allows a user to escape any character with a \
+
+All of these can be set by adding the `jsonOptions` to your connection configuration as shown below:
+
+```json
+
+"jsonOptions": {
+ "allTextMode": true,
+ "readNumbersAsDouble": true
+}
+
+```
+
#### Authorization
`authType`: If your API requires authentication, specify the authentication
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpApiConfig.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpApiConfig.java
index 75f85f5..cca2767 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpApiConfig.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpApiConfig.java
@@ -103,6 +103,10 @@ public class HttpApiConfig {
@JsonProperty
private final boolean errorOn400;
+ // Enables the user to configure JSON options at the connection level rather than globally.
+ @JsonProperty
+ private final HttpJsonOptions jsonOptions;
+
@JsonInclude
@JsonProperty
private final boolean verifySSLCert;
@@ -127,6 +131,7 @@ public class HttpApiConfig {
this.method = StringUtils.isEmpty(builder.method)
? HttpMethod.GET.toString() : builder.method.trim().toUpperCase();
this.url = builder.url;
+ this.jsonOptions = builder.jsonOptions;
HttpMethod httpMethod = HttpMethod.valueOf(this.method);
// Get the request method. Only accept GET and POST requests. Anything else will default to GET.
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
index 921210c..59d14af 100644
--- a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpBatchReader.java
@@ -32,11 +32,14 @@ import org.apache.drill.exec.physical.impl.scan.framework.SchemaNegotiator;
import org.apache.drill.exec.physical.resultSet.ResultSetLoader;
import org.apache.drill.exec.store.easy.json.loader.JsonLoader;
import org.apache.drill.exec.store.easy.json.loader.JsonLoaderImpl.JsonLoaderBuilder;
+import org.apache.drill.exec.store.easy.json.loader.JsonLoaderOptions;
import org.apache.drill.exec.store.http.util.HttpProxyConfig;
import org.apache.drill.exec.store.http.util.HttpProxyConfig.ProxyBuilder;
import org.apache.drill.exec.store.http.util.SimpleHttp;
import org.apache.drill.exec.store.security.UsernamePasswordCredentials;
import org.apache.drill.exec.store.ImplicitColumnUtils.ImplicitColumns;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.InputStream;
@@ -47,6 +50,7 @@ public class HttpBatchReader implements ManagedReader<SchemaNegotiator> {
private static final String[] STRING_METADATA_FIELDS = {"_response_message", "_response_protocol", "_response_url"};
private static final String RESPONSE_CODE_FIELD = "_response_code";
+ private static final Logger logger = LoggerFactory.getLogger(HttpBatchReader.class);
private final HttpSubScan subScan;
private final int maxRecords;
@@ -100,14 +104,20 @@ public class HttpBatchReader implements ManagedReader<SchemaNegotiator> {
populateImplicitFieldMap(http);
try {
- jsonLoader = new JsonLoaderBuilder()
+ JsonLoaderBuilder jsonBuilder = new JsonLoaderBuilder()
.implicitFields(implicitColumns)
.resultSetLoader(loader)
- .standardOptions(negotiator.queryOptions())
.dataPath(subScan.tableSpec().connectionConfig().dataPath())
.errorContext(errorContext)
- .fromStream(inStream)
- .build();
+ .fromStream(inStream);
+
+ if (subScan.tableSpec().connectionConfig().jsonOptions() != null) {
+ JsonLoaderOptions jsonOptions = subScan.tableSpec().connectionConfig().jsonOptions().getJsonOptions(negotiator.queryOptions());
+ jsonBuilder.options(jsonOptions);
+ } else {
+ jsonBuilder.standardOptions(negotiator.queryOptions());
+ }
+ jsonLoader = jsonBuilder.build();
} catch (Throwable t) {
// Paranoia: ensure stream is closed if anything goes wrong.
diff --git a/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java
new file mode 100644
index 0000000..4928c66
--- /dev/null
+++ b/contrib/storage-http/src/main/java/org/apache/drill/exec/store/http/HttpJsonOptions.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.store.http;
+
+import com.fasterxml.jackson.annotation.JsonIgnore;
+import com.fasterxml.jackson.annotation.JsonInclude;
+import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
+import lombok.Builder;
+import lombok.EqualsAndHashCode;
+import lombok.Getter;
+import lombok.ToString;
+import lombok.experimental.Accessors;
+import lombok.extern.slf4j.Slf4j;
+import org.apache.drill.exec.server.options.OptionSet;
+import org.apache.drill.exec.store.easy.json.loader.JsonLoaderOptions;
+
+@Slf4j
+@Builder
+@Getter
+@Accessors(fluent = true)
+@EqualsAndHashCode
+@ToString
+@JsonInclude(JsonInclude.Include.NON_DEFAULT)
+@JsonDeserialize(builder = HttpJsonOptions.HttpJsonOptionsBuilder.class)
+public class HttpJsonOptions {
+
+ @JsonInclude
+ private final Boolean allowNanInf;
+
+ @JsonInclude
+ private final Boolean allTextMode;
+
+ @JsonInclude
+ private final Boolean readNumbersAsDouble;
+
+ @JsonInclude
+ private final Boolean enableEscapeAnyChar;
+
+ @JsonIgnore
+ public JsonLoaderOptions getJsonOptions(OptionSet optionSet) {
+
+ JsonLoaderOptions options = new JsonLoaderOptions(optionSet);
+
+ if (allowNanInf != null) {
+ options.allowNanInf = allowNanInf;
+ }
+
+ if (allTextMode != null) {
+ options.allTextMode = allTextMode;
+ }
+
+ if (readNumbersAsDouble != null) {
+ options.readNumbersAsDouble = readNumbersAsDouble;
+ }
+
+ if (enableEscapeAnyChar != null) {
+ options.enableEscapeAnyChar = enableEscapeAnyChar;
+ }
+
+ return options;
+ }
+
+}
diff --git a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
index 0375c99..0aa2c05 100644
--- a/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
+++ b/contrib/storage-http/src/test/java/org/apache/drill/exec/store/http/TestHttpPlugin.java
@@ -23,6 +23,7 @@ import okhttp3.mockwebserver.MockWebServer;
import okhttp3.mockwebserver.RecordedRequest;
import org.apache.drill.common.exceptions.UserException;
import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.DataMode;
import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.common.util.DrillFileUtils;
import org.apache.drill.exec.physical.rowSet.RowSet;
@@ -71,6 +72,7 @@ public class TestHttpPlugin extends ClusterTest {
private static String TEST_JSON_RESPONSE;
private static String TEST_CSV_RESPONSE;
private static String TEST_XML_RESPONSE;
+ private static String TEST_JSON_RESPONSE_WITH_DATATYPES;
@BeforeClass
public static void setup() throws Exception {
@@ -79,6 +81,7 @@ public class TestHttpPlugin extends ClusterTest {
TEST_JSON_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.json"), Charsets.UTF_8).read();
TEST_CSV_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.csv"), Charsets.UTF_8).read();
TEST_XML_RESPONSE = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response.xml"), Charsets.UTF_8).read();
+ TEST_JSON_RESPONSE_WITH_DATATYPES = Files.asCharSource(DrillFileUtils.getResourceAsFile("/data/response2.json"), Charsets.UTF_8).read();
dirTestWatcher.copyResourceToRoot(Paths.get("data/"));
makeLiveConfig();
@@ -230,6 +233,16 @@ public class TestHttpPlugin extends ClusterTest {
.requireTail(false)
.build();
+ HttpApiConfig mockTableWithJsonOptions = HttpApiConfig.builder()
+ .url("http://localhost:8091/json")
+ .method("GET")
+ .headers(headers)
+ .requireTail(false)
+ .jsonOptions(HttpJsonOptions.builder()
+ .allTextMode(true)
+ .build()
+ )
+ .build();
Map<String, HttpApiConfig> configs = new HashMap<>();
configs.put("sunrise", mockSchema);
@@ -240,6 +253,7 @@ public class TestHttpPlugin extends ClusterTest {
configs.put("github", mockGithubWithParam);
configs.put("github2", mockGithubWithDuplicateParam);
configs.put("github3", mockGithubWithParamInQuery);
+ configs.put("mockJsonAllText", mockTableWithJsonOptions);
HttpStoragePluginConfig mockStorageConfigWithWorkspace =
new HttpStoragePluginConfig(false, configs, 2, "", 80, "", "", "", PlainCredentialsProvider.EMPTY_CREDENTIALS_PROVIDER);
@@ -414,6 +428,32 @@ public class TestHttpPlugin extends ClusterTest {
}
@Test
+ public void simpleTestWithJsonConfig() throws Exception {
+ String sql = "SELECT * FROM local.mockJsonAllText";
+
+ try (MockWebServer server = startServer()) {
+ server.enqueue(new MockResponse().setResponseCode(200).setBody(TEST_JSON_RESPONSE_WITH_DATATYPES));
+ RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("col_1", MinorType.VARCHAR, DataMode.OPTIONAL)
+ .add("col_2", MinorType.VARCHAR, DataMode.OPTIONAL)
+ .add("col_3", MinorType.VARCHAR, DataMode.OPTIONAL)
+ .build();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("1.0", "2", "3.0")
+ .addRow("4.0", "5", "6.0")
+ .build();
+
+ RowSetUtilities.verify(expected, results);
+ } catch (Exception e) {
+ System.out.println(e.getMessage());
+ fail();
+ }
+ }
+
+ @Test
public void simpleTestWithMockServerWithURLParams() throws Exception {
String sql = "SELECT _response_url FROM local.github\n" +
"WHERE `org` = 'apache'";
diff --git a/contrib/storage-http/src/test/resources/data/response2.json b/contrib/storage-http/src/test/resources/data/response2.json
new file mode 100644
index 0000000..467a458
--- /dev/null
+++ b/contrib/storage-http/src/test/resources/data/response2.json
@@ -0,0 +1,10 @@
+[
+ {
+ "col_1": 1.0,
+ "col_2": 2,
+ "col_3": "3.0"
+ },{
+ "col_1": 4.0,
+ "col_2": 5,
+ "col_3": "6.0"
+}]