You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@drill.apache.org by "ASF GitHub Bot (Jira)" <ji...@apache.org> on 2021/12/03 00:19:00 UTC

[jira] [Commented] (DRILL-8037) Add V2 JSON Format Plugin based on EVF

    [ https://issues.apache.org/jira/browse/DRILL-8037?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17452663#comment-17452663 ] 

ASF GitHub Bot commented on DRILL-8037:
---------------------------------------

vdiravka commented on a change in pull request #2364:
URL: https://github.com/apache/drill/pull/2364#discussion_r761559504



##########
File path: exec/java-exec/src/main/resources/drill-module.conf
##########
@@ -692,6 +692,7 @@ drill.exec.options: {
     # Property name and value should be separated by =.
     # Properties should be separated by new line (\n).
     store.hive.conf.properties: "",
+    store.json.enable_v2_reader: true,

Review comment:
       yes, we want by default

##########
File path: exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
##########
@@ -231,21 +331,23 @@ public void testCountingQueryNotSkippingInvalidJSONRecords() throws Exception {
   /* Test for JSONReader */
   public void testNotCountingQuerySkippingInvalidJSONRecords() throws Exception {
     try {
-
       String set = "alter session set `"
-          + ExecConstants.JSON_READER_SKIP_INVALID_RECORDS_FLAG + "` = true";
+        + ExecConstants.JSON_READER_SKIP_INVALID_RECORDS_FLAG + "` = true";
       String set1 = "alter session set `"
-          + ExecConstants.JSON_READER_PRINT_INVALID_RECORDS_LINE_NOS_FLAG
-          + "` = true";
+        + ExecConstants.JSON_READER_PRINT_INVALID_RECORDS_LINE_NOS_FLAG
+        + "` = true";
       String query = "select sum(balance) from cp.`jsoninput/drill4653/file.json`";
       testNoResult(set);
       testNoResult(set1);
-      testBuilder().unOrdered().sqlQuery(query).sqlBaselineQuery(query).build()
-          .run();
+      testBuilder()
+        .unOrdered()
+        .sqlQuery(query)
+        .sqlBaselineQuery(query)
+        .go();
     }
     finally {
       String set = "alter session set `"
-          + ExecConstants.JSON_READER_SKIP_INVALID_RECORDS_FLAG + "` = false";
+        + ExecConstants.JSON_READER_SKIP_INVALID_RECORDS_FLAG + "` = false";

Review comment:
       What changes?

##########
File path: exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestExtendedTypes.java
##########
@@ -26,11 +26,11 @@
 import org.apache.drill.test.BaseTestQuery;
 import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.rpc.user.QueryDataBatch;
-import org.junit.Assert;
 import org.junit.BeforeClass;
 import org.junit.Test;
 
 public class TestExtendedTypes extends BaseTestQuery {
+

Review comment:
       Spaces? Could you point me what cleanup do you mean?

##########
File path: exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestExtendedTypes.java
##########
@@ -78,10 +78,10 @@ public void testMongoExtendedTypes() throws Exception {
       List<QueryDataBatch> resultList = testSqlWithResults(String.format("select * from dfs.`%s`", originalFile));
       String actual = getResultString(resultList, ",");
       String expected = "drill_timestamp_millies,bin,bin1\n2015-07-07 03:59:43.488,drill,drill\n";
-      Assert.assertEquals(expected, actual);
+      assertEquals(expected, actual);

Review comment:
       Do you mean to add test cases for every extended type from the file?

##########
File path: exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/JSONFormatPlugin.java
##########
@@ -19,64 +19,82 @@
 
 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-import java.util.Objects;
 
-import org.apache.drill.common.PlanStringBuilder;
+import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.expression.SchemaPath;
-import org.apache.drill.common.logical.FormatPluginConfig;
 import org.apache.drill.common.logical.StoragePluginConfig;
+import org.apache.drill.common.types.TypeProtos.MinorType;
+import org.apache.drill.common.types.Types;
 import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.ops.FragmentContext;
 import org.apache.drill.exec.ops.QueryContext.SqlStatementType;
+import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileReaderFactory;
+import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileScanBuilder;
+import org.apache.drill.exec.physical.impl.scan.file.FileScanFramework.FileSchemaNegotiator;
+import org.apache.drill.exec.physical.impl.scan.framework.ManagedReader;
 import org.apache.drill.exec.planner.common.DrillStatsTable;
 import org.apache.drill.exec.planner.common.DrillStatsTable.TableStatistics;
 import org.apache.drill.exec.proto.ExecProtos.FragmentHandle;
 import org.apache.drill.exec.server.DrillbitContext;
+import org.apache.drill.exec.server.options.OptionSet;
 import org.apache.drill.exec.store.RecordReader;
 import org.apache.drill.exec.store.RecordWriter;
 import org.apache.drill.exec.store.StatisticsRecordWriter;
 import org.apache.drill.exec.store.dfs.DrillFileSystem;
 import org.apache.drill.exec.store.dfs.easy.EasyFormatPlugin;
+import org.apache.drill.exec.store.dfs.easy.EasySubScan;
 import org.apache.drill.exec.store.dfs.easy.EasyWriter;
 import org.apache.drill.exec.store.dfs.easy.FileWork;
-import org.apache.drill.exec.store.easy.json.JSONFormatPlugin.JSONFormatConfig;
-import org.apache.drill.shaded.guava.com.google.common.collect.ImmutableList;
-import org.apache.drill.shaded.guava.com.google.common.collect.Maps;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import com.fasterxml.jackson.annotation.JsonCreator;
-import com.fasterxml.jackson.annotation.JsonInclude;
-import com.fasterxml.jackson.annotation.JsonProperty;
-import com.fasterxml.jackson.annotation.JsonTypeName;
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonGenerator;
 import com.fasterxml.jackson.databind.ObjectMapper;
 
+import static org.apache.drill.exec.store.easy.json.JSONFormatConfig.PLUGIN_NAME;
+
 public class JSONFormatPlugin extends EasyFormatPlugin<JSONFormatConfig> {
 
   private static final Logger logger = LoggerFactory.getLogger(JSONFormatPlugin.class);
-  public static final String DEFAULT_NAME = "json";
-
   private static final boolean IS_COMPRESSIBLE = true;
 
-  public static final String OPERATOR_TYPE = "JSON_SUB_SCAN";
+  public static final String READER_OPERATOR_TYPE = "JSON_SUB_SCAN";
+  public static final String WRITER_OPERATOR_TYPE = "JSON_WRITER";
 
   public JSONFormatPlugin(String name, DrillbitContext context,
       Configuration fsConf, StoragePluginConfig storageConfig) {
     this(name, context, fsConf, storageConfig, new JSONFormatConfig(null));
   }
 
-  public JSONFormatPlugin(String name, DrillbitContext context,
-      Configuration fsConf, StoragePluginConfig config, JSONFormatConfig formatPluginConfig) {
-    super(name, context, fsConf, config, formatPluginConfig, true,
-          false, false, IS_COMPRESSIBLE, formatPluginConfig.getExtensions(), DEFAULT_NAME);
+  public JSONFormatPlugin(String name, DrillbitContext context, Configuration fsConf,
+      StoragePluginConfig config, JSONFormatConfig formatPluginConfig) {
+    super(name, easyConfig(fsConf, formatPluginConfig), context, config, formatPluginConfig);
+  }
+
+  private static EasyFormatConfig easyConfig(Configuration fsConf, JSONFormatConfig pluginConfig) {
+    return EasyFormatConfig.builder()
+      .readable(true)
+      .writable(true)
+      .blockSplittable(false)
+      .compressible(IS_COMPRESSIBLE)
+      .supportsProjectPushdown(true)
+      .extensions(pluginConfig.getExtensions())
+      .fsConf(fsConf)
+      .defaultName(PLUGIN_NAME)
+      .readerOperatorType(READER_OPERATOR_TYPE)
+      .writerOperatorType(WRITER_OPERATOR_TYPE)

Review comment:
       It is enabled by default. So what is temporary here?

##########
File path: exec/java-exec/src/test/java/org/apache/drill/exec/vector/complex/writer/TestJsonEscapeAnyChar.java
##########
@@ -80,6 +112,14 @@ private void resetJsonReaderEscapeAnyChar() {
     client.alterSession(ExecConstants.JSON_READER_ESCAPE_ANY_CHAR, false);
   }
 
+  private void enableV2Reader(boolean enable) {
+    client.alterSession(ExecConstants.ENABLE_V2_JSON_READER_KEY, enable);
+  }
+
+  private void resetV2Reader() {
+    client.resetSession(ExecConstants.ENABLE_V2_JSON_READER_KEY);
+  }
+

Review comment:
       What changes?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@drill.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


> Add V2 JSON Format Plugin based on EVF
> --------------------------------------
>
>                 Key: DRILL-8037
>                 URL: https://issues.apache.org/jira/browse/DRILL-8037
>             Project: Apache Drill
>          Issue Type: Sub-task
>            Reporter: Vitalii Diravka
>            Assignee: Vitalii Diravka
>            Priority: Major
>
> This adds new V2 beta JSON Format Plugin based on the "Extended Vector Framework".
> This is follow up DRILL-6953 (was closed with the decision to merge it by small pieces).
> So it is based on [https://github.com/apache/drill/pull/1913] and [https://github.com/paul-rogers/drill/tree/DRILL-6953-rev2] work.



--
This message was sent by Atlassian Jira
(v8.20.1#820001)