You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ag...@apache.org on 2019/03/04 08:18:58 UTC

[drill] branch master updated: DRILL-7060: Support JsonParser Feature 'ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER' (#1663)

This is an automated email from the ASF dual-hosted git repository.

agirish pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 8c7de78  DRILL-7060: Support JsonParser Feature 'ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER' (#1663)
8c7de78 is described below

commit 8c7de7838124a00e1b6b786fde2ad8dfd1b0ba9d
Author: Abhishek Girish <ag...@apache.org>
AuthorDate: Mon Mar 4 00:18:52 2019 -0800

    DRILL-7060: Support JsonParser Feature 'ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER' (#1663)
---
 .../java/org/apache/drill/exec/ExecConstants.java  |  8 ++
 .../exec/server/options/SystemOptionManager.java   |  1 +
 .../exec/store/easy/json/JSONRecordReader.java     |  5 +-
 .../store/easy/json/reader/BaseJsonProcessor.java  | 18 ++++-
 .../store/easy/json/reader/CountingJsonReader.java |  4 +-
 .../drill/exec/vector/complex/fn/JsonReader.java   | 11 ++-
 .../java-exec/src/main/resources/drill-module.conf |  1 +
 .../complex/writer/TestJsonEscapeAnyChar.java      | 87 ++++++++++++++++++++++
 8 files changed, 125 insertions(+), 10 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index ac3252a..6fce8b7 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -442,6 +442,14 @@ public final class ExecConstants {
   public static final String JSON_READER_NAN_INF_NUMBERS = "store.json.reader.allow_nan_inf";
   public static final BooleanValidator JSON_READER_NAN_INF_NUMBERS_VALIDATOR = new BooleanValidator(JSON_READER_NAN_INF_NUMBERS,
       new OptionDescription("Enables the JSON record reader in Drill to read `NaN` and `Infinity` tokens in JSON data as numbers. Default is true. (Drill 1.13+)"));
+
+  /**
+   * Json reader option that enables parser to escape any characters
+   */
+  public static final String JSON_READER_ESCAPE_ANY_CHAR = "store.json.reader.allow_escape_any_char";
+  public static final BooleanValidator JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR = new BooleanValidator(JSON_READER_ESCAPE_ANY_CHAR,
+    new OptionDescription("Enables the JSON record reader in Drill to escape any character. Default is false. (Drill 1.16+)"));
+
   /**
    * The column label (for directory levels) in results when querying files in a directory
    * E.g.  labels: dir0   dir1<pre>
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
index 1a9eff3..030e1a3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/options/SystemOptionManager.java
@@ -182,6 +182,7 @@ public class SystemOptionManager extends BaseOptionManager implements AutoClosea
       new OptionDefinition(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR),
       new OptionDefinition(ExecConstants.JSON_WRITER_NAN_INF_NUMBERS_VALIDATOR),
       new OptionDefinition(ExecConstants.JSON_READER_NAN_INF_NUMBERS_VALIDATOR),
+      new OptionDefinition(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR),
       new OptionDefinition(ExecConstants.ENABLE_UNION_TYPE),
       new OptionDefinition(ExecConstants.TEXT_ESTIMATED_ROW_SIZE),
       new OptionDefinition(ExecConstants.JSON_EXTENDED_TYPES),
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
index 62ace66..428a4e1 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONRecordReader.java
@@ -61,6 +61,7 @@ public class JSONRecordReader extends AbstractRecordReader {
   private final FragmentContext fragmentContext;
   private final boolean enableAllTextMode;
   private final boolean enableNanInf;
+  private final boolean enableEscapeAnyChar;
   private final boolean readNumbersAsDouble;
   private final boolean unionEnabled;
   private long parseErrorCount;
@@ -115,6 +116,7 @@ public class JSONRecordReader extends AbstractRecordReader {
     // only enable all text mode if we aren't using embedded content mode.
     this.enableAllTextMode = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ALL_TEXT_MODE_VALIDATOR);
     this.enableNanInf = fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_NAN_INF_NUMBERS_VALIDATOR);
+    this.enableEscapeAnyChar = fragmentContext.getOptions().getOption(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR_VALIDATOR);
     this.readNumbersAsDouble = embeddedContent == null && fragmentContext.getOptions().getOption(ExecConstants.JSON_READ_NUMBERS_AS_DOUBLE_VALIDATOR);
     this.unionEnabled = embeddedContent == null && fragmentContext.getOptions().getBoolean(ExecConstants.ENABLE_UNION_TYPE_KEY);
     this.skipMalformedJSONRecords = fragmentContext.getOptions().getOption(ExecConstants.JSON_SKIP_MALFORMED_RECORDS_VALIDATOR);
@@ -142,7 +144,7 @@ public class JSONRecordReader extends AbstractRecordReader {
 
       this.writer = new VectorContainerWriter(output, unionEnabled);
       if (isSkipQuery()) {
-        this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer(), enableNanInf);
+        this.jsonReader = new CountingJsonReader(fragmentContext.getManagedBuffer(), enableNanInf, enableEscapeAnyChar);
       } else {
         this.jsonReader = new JsonReader.Builder(fragmentContext.getManagedBuffer())
             .schemaPathColumns(ImmutableList.copyOf(getColumns()))
@@ -150,6 +152,7 @@ public class JSONRecordReader extends AbstractRecordReader {
             .skipOuterList(true)
             .readNumbersAsDouble(readNumbersAsDouble)
             .enableNanInf(enableNanInf)
+            .enableEscapeAnyChar(enableEscapeAnyChar)
             .build();
       }
       setupParser();
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java
index 48a1464..4b61863 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonProcessor.java
@@ -39,10 +39,17 @@ public abstract class BaseJsonProcessor implements JsonProcessor {
   private static final ObjectMapper DEFAULT_MAPPER = getDefaultMapper();
 
   private static final ObjectMapper NAN_INF_MAPPER = getDefaultMapper()
-      .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
+    .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true);
+
+  private static final ObjectMapper ESCAPE_ANY_MAPPER = getDefaultMapper()
+    .configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true);
+
+  private static final ObjectMapper NAN_INF_AND_ESCAPE_ANY_MAPPER = getDefaultMapper()
+    .configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true).configure(JsonParser.Feature.ALLOW_BACKSLASH_ESCAPING_ANY_CHARACTER, true);
 
   private static final String JACKSON_PARSER_EOF_FILE_MSG = "Unexpected end-of-input:";
   private final boolean enableNanInf;
+  private final boolean enableEscapeAnyChar;
 
   public enum JsonExceptionProcessingState {
     END_OF_STREAM, PROC_SUCCEED
@@ -75,15 +82,20 @@ public abstract class BaseJsonProcessor implements JsonProcessor {
     this.ignoreJSONParseErrors = ignoreJSONParseErrors;
   }
 
-  public BaseJsonProcessor(DrillBuf workBuf, boolean enableNanInf) {
+  public BaseJsonProcessor(DrillBuf workBuf, boolean enableNanInf, boolean enableEscapeAnyChar) {
     this.enableNanInf = enableNanInf;
+    this.enableEscapeAnyChar = enableEscapeAnyChar;
     workBuf = Preconditions.checkNotNull(workBuf);
   }
 
   @Override
   public void setSource(InputStream is) throws IOException {
-    if (enableNanInf) {
+    if (enableNanInf && enableEscapeAnyChar) {
+      parser = NAN_INF_AND_ESCAPE_ANY_MAPPER.getFactory().createParser(is);
+    } else if (enableNanInf) {
       parser = NAN_INF_MAPPER.getFactory().createParser(is);
+    } else if (enableEscapeAnyChar){
+      parser = ESCAPE_ANY_MAPPER.getFactory().createParser(is);
     } else {
       parser = DEFAULT_MAPPER.getFactory().createParser(is);
     }
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
index 0f92ec5..73b93f4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
@@ -27,8 +27,8 @@ import org.apache.drill.exec.vector.complex.writer.BaseWriter;
 
 public class CountingJsonReader extends BaseJsonProcessor {
 
-  public CountingJsonReader(DrillBuf workBuf, boolean enableNanInf) {
-    super(workBuf, enableNanInf);
+  public CountingJsonReader(DrillBuf workBuf, boolean enableNanInf, boolean enableEscapeAnyChar) {
+    super(workBuf, enableNanInf, enableEscapeAnyChar);
   }
 
   @Override
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
index aaa9806..34b1f80 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
@@ -74,7 +74,7 @@ public class JsonReader extends BaseJsonProcessor {
   private FieldSelection selection;
 
   private JsonReader(Builder builder) {
-    super(builder.managedBuf, builder.enableNanInf);
+    super(builder.managedBuf, builder.enableNanInf, builder.enableEscapeAnyChar);
     selection = FieldSelection.getFieldSelection(builder.columns);
     workingBuffer = builder.workingBuffer;
     skipOuterList = builder.skipOuterList;
@@ -97,6 +97,7 @@ public class JsonReader extends BaseJsonProcessor {
     private  boolean skipOuterList;
     private  boolean allTextMode;
     private  boolean enableNanInf;
+    private  boolean enableEscapeAnyChar;
 
 
     public Builder(DrillBuf managedBuf) {
@@ -104,9 +105,6 @@ public class JsonReader extends BaseJsonProcessor {
       this.workingBuffer = new WorkingBuffer(managedBuf);
       this.mapOutput = new MapVectorOutput(workingBuffer);
       this.listOutput = new ListVectorOutput(workingBuffer);
-      this.readNumbersAsDouble = false;
-      this.skipOuterList = false;
-      this.allTextMode = false;
       this.enableNanInf = true;
     }
 
@@ -130,6 +128,11 @@ public class JsonReader extends BaseJsonProcessor {
       return this;
     }
 
+    public Builder enableEscapeAnyChar(boolean enableEscapeAnyChar) {
+      this.enableEscapeAnyChar = enableEscapeAnyChar;
+      return this;
+    }
+
     public Builder defaultSchemaPathColumns() {
       this.columns = GroupScan.ALL_COLUMNS;
       return this;
diff --git a/exec/java-exec/src/main/resources/drill-module.conf b/exec/java-exec/src/main/resources/drill-module.conf
index 386b8c2..4f6fbb2 100644
--- a/exec/java-exec/src/main/resources/drill-module.conf
+++ b/exec/java-exec/src/main/resources/drill-module.conf
@@ -606,6 +606,7 @@ drill.exec.options: {
     store.json.all_text_mode: false,
     store.json.writer.allow_nan_inf: true,
     store.json.reader.allow_nan_inf: true,
+    store.json.reader.allow_escape_any_char: false,
     store.json.extended_types: false,
     store.json.read_numbers_as_double: false,
     store.json.reader.print_skipped_invalid_record_number: false,
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java
new file mode 100644
index 0000000..323da27
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector.complex.writer;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.drill.common.exceptions.UserRemoteException;
+import org.apache.drill.exec.ExecConstants;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterTest;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertThat;
+
+public class TestJsonEscapeAnyChar extends ClusterTest {
+
+  private File testFile;
+  private static final String TABLE = "escape.json";
+  private static final String JSON_DATA = "{\"name\": \"ABC\\S\"}";
+  private static final String QUERY = String.format("select * from dfs.`%s`", TABLE);
+
+  @Before
+  public void setup() throws Exception {
+    startCluster(ClusterFixture.builder(dirTestWatcher));
+    testFile = new File(dirTestWatcher.getRootDir(), TABLE);
+    FileUtils.writeStringToFile(testFile, JSON_DATA);
+  }
+
+  @Test
+  public void testwithOptionEnabled() throws Exception {
+
+    try {
+      enableJsonReaderEscapeAnyChar();
+      testBuilder()
+        .sqlQuery(QUERY)
+        .unOrdered()
+        .baselineColumns("name")
+        .baselineValues("ABCS")
+        .build()
+        .run();
+    } finally {
+      resetJsonReaderEscapeAnyChar();
+    }
+  }
+
+  @Test
+  public void testwithOptionDisabled() throws Exception {
+    try {
+      queryBuilder().sql(QUERY)
+        .run();
+    } catch (UserRemoteException e) {
+      assertThat(e.getMessage(), containsString("DATA_READ ERROR: Error parsing JSON - Unrecognized character escape"));
+    }
+  }
+
+  private void enableJsonReaderEscapeAnyChar() {
+    client.alterSession(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR, true);
+  }
+
+  private void resetJsonReaderEscapeAnyChar() {
+    client.alterSession(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR, false);
+  }
+
+  @After
+  public void teardown() throws Exception {
+    FileUtils.deleteQuietly(testFile);
+  }
+}
\ No newline at end of file