You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by gp...@apache.org on 2019/09/08 03:00:10 UTC

[drill] branch master updated (b30830a -> 65df1fd)

This is an automated email from the ASF dual-hosted git repository.

gparai pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git.


    from b30830a  DRILL-7096: Develop vector for canonical Map<K,V>
     new bd6d7b1  DRILL-7343: Add User-Agent UDFs to Drill
     new 797524d  DRILL-7362: COUNT(*) on JSON with outer list results in JsonParse error
     new f8bc0db  DRILL-7367: Remove Server details from response headers
     new 65df1fd  DRILL-7369: Schema for MaprDB tables is not used for the case when several fields are queried

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 contrib/udfs/README.md                             |  56 +++++++
 contrib/udfs/pom.xml                               |   5 +
 .../apache/drill/exec/udfs/UserAgentFunctions.java | 172 +++++++++++++++++++++
 .../drill/exec/udfs/TestUserAgentFunctions.java    | 171 ++++++++++++++++++++
 .../drill/yarn/appMaster/http/WebServer.java       |  40 ++---
 .../apache/drill/exec/server/rest/WebServer.java   |  31 ++--
 .../store/easy/json/reader/BaseJsonReader.java     | 167 ++++++++++++++++++++
 .../store/easy/json/reader/CountingJsonReader.java |  47 ++----
 .../drill/exec/vector/complex/fn/JsonReader.java   | 126 ++-------------
 .../exec/vector/complex/fn/JsonReaderUtils.java    |   6 +-
 .../exec/store/json/TestJsonRecordReader.java      |  13 ++
 11 files changed, 644 insertions(+), 190 deletions(-)
 create mode 100644 contrib/udfs/README.md
 create mode 100644 contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentFunctions.java
 create mode 100644 contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestUserAgentFunctions.java
 create mode 100644 exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonReader.java


[drill] 03/04: DRILL-7367: Remove Server details from response headers

Posted by gp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit f8bc0db29f08ec9f9ff082bec202a2ab2495bac1
Author: Arina Ielchiieva <ar...@gmail.com>
AuthorDate: Thu Sep 5 17:04:23 2019 +0300

    DRILL-7367: Remove Server details from response headers
    
    closes #1851
---
 .../drill/yarn/appMaster/http/WebServer.java       | 40 ++++++++--------------
 .../apache/drill/exec/server/rest/WebServer.java   | 31 ++++++++++-------
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git a/drill-yarn/src/main/java/org/apache/drill/yarn/appMaster/http/WebServer.java b/drill-yarn/src/main/java/org/apache/drill/yarn/appMaster/http/WebServer.java
index 5ba31bc..75d99d9 100644
--- a/drill-yarn/src/main/java/org/apache/drill/yarn/appMaster/http/WebServer.java
+++ b/drill-yarn/src/main/java/org/apache/drill/yarn/appMaster/http/WebServer.java
@@ -48,7 +48,6 @@ import org.bouncycastle.cert.jcajce.JcaX509v3CertificateBuilder;
 import org.bouncycastle.operator.ContentSigner;
 import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder;
 import org.eclipse.jetty.http.HttpVersion;
-import org.eclipse.jetty.security.ConstraintMapping;
 import org.eclipse.jetty.security.ConstraintSecurityHandler;
 import org.eclipse.jetty.security.DefaultIdentityService;
 import org.eclipse.jetty.security.DefaultUserIdentity;
@@ -109,7 +108,7 @@ public class WebServer implements AutoCloseable {
   /**
    * Start the web server including setup.
    *
-   * @throws Exception
+   * @throws Exception in case of error during start
    */
   public void start() throws Exception {
     if (jettyServer == null) {
@@ -223,7 +222,7 @@ public class WebServer implements AutoCloseable {
   }
 
   public static class AMUserPrincipal implements Principal {
-    public final String userName;
+    private final String userName;
 
     public AMUserPrincipal(String userName) {
       this.userName = userName;
@@ -236,7 +235,7 @@ public class WebServer implements AutoCloseable {
   }
 
   public static class AmLoginService implements LoginService {
-    private AMSecurityManager securityMgr;
+    private final AMSecurityManager securityMgr;
     protected IdentityService identityService = new DefaultIdentityService();
 
     public AmLoginService(AMSecurityManager securityMgr) {
@@ -274,18 +273,6 @@ public class WebServer implements AutoCloseable {
     @Override
     public void logout(UserIdentity user) {
     }
-
-    // @Override
-    // protected UserIdentity loadUser(String username) {
-    // // TODO Auto-generated method stub
-    // return null;
-    // }
-    //
-    // @Override
-    // protected void loadUsers() throws IOException {
-    // putUser( "fred", new Password( "wilma" ), new String[] { ADMIN_ROLE } );
-    // }
-
   }
 
   /**
@@ -298,8 +285,7 @@ public class WebServer implements AutoCloseable {
     ConstraintSecurityHandler security = new ConstraintSecurityHandler();
 
     Set<String> knownRoles = ImmutableSet.of(ADMIN_ROLE);
-    security.setConstraintMappings(Collections.<ConstraintMapping> emptyList(),
-        knownRoles);
+    security.setConstraintMappings(Collections.emptyList(), knownRoles);
 
     security.setAuthenticator(new FormAuthenticator("/login", "/login", true));
     security
@@ -350,13 +336,11 @@ public class WebServer implements AutoCloseable {
    * Create HTTP connector.
    *
    * @return Initialized {@link ServerConnector} instance for HTTP connections.
-   * @throws Exception
    */
-  private ServerConnector createHttpConnector(Config config) throws Exception {
+  private ServerConnector createHttpConnector(Config config) {
     LOG.info("Setting up HTTP connector for web server");
-    final HttpConfiguration httpConfig = new HttpConfiguration();
     final ServerConnector httpConnector = new ServerConnector(jettyServer,
-        new HttpConnectionFactory(httpConfig));
+        new HttpConnectionFactory(baseHttpConfig()));
     httpConnector.setPort(config.getInt(DrillOnYarnConfig.HTTP_PORT));
 
     return httpConnector;
@@ -368,12 +352,12 @@ public class WebServer implements AutoCloseable {
    * certificate is generated and used.
    * <p>
    * This is a shameless copy of
-   * {@link org.apache.drill.exec.server.rest.WebServer#createHttpsConnector(int, int, int)}.
+   * org.apache.drill.exec.server.rest.WebServer#createHttpsConnector(int, int, int).
    * The two should be merged at some point. The primary issue is that the Drill
    * version is tightly coupled to Drillbit configuration.
    *
    * @return Initialized {@link ServerConnector} for HTTPS connections.
-   * @throws Exception
+   * @throws Exception when unable to create HTTPS connector
    */
   private ServerConnector createHttpsConnector(Config config) throws Exception {
     LOG.info("Setting up HTTPS connector for web server");
@@ -446,7 +430,7 @@ public class WebServer implements AutoCloseable {
     sslContextFactory.setKeyStorePassword(keyStorePasswd);
     // }
 
-    final HttpConfiguration httpsConfig = new HttpConfiguration();
+    final HttpConfiguration httpsConfig = baseHttpConfig();
     httpsConfig.addCustomizer(new SecureRequestCustomizer());
 
     // SSL Connector
@@ -459,6 +443,12 @@ public class WebServer implements AutoCloseable {
     return sslConnector;
   }
 
+  private HttpConfiguration baseHttpConfig() {
+    HttpConfiguration httpConfig = new HttpConfiguration();
+    httpConfig.setSendServerVersion(false);
+    return httpConfig;
+  }
+
   @Override
   public void close() throws Exception {
     if (jettyServer != null) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/WebServer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/WebServer.java
index b912a4c..bc093ad 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/WebServer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/WebServer.java
@@ -101,7 +101,7 @@ import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 /**
- * Wrapper class around jetty based webserver.
+ * Wrapper class around jetty based web server.
  */
 public class WebServer implements AutoCloseable {
   private static final String ACE_MODE_SQL_TEMPLATE_JS = "ace.mode-sql.template.js";
@@ -270,7 +270,7 @@ public class WebServer implements AutoCloseable {
   /**
    * It creates A {@link SessionHandler} which contains a {@link HashSessionManager}
    *
-   * @param securityHandler Set of initparameters that are used by the Authentication
+   * @param securityHandler Set of init parameters that are used by the Authentication
    * @return session handler
    */
   private SessionHandler createSessionHandler(final SecurityHandler securityHandler) {
@@ -354,7 +354,7 @@ public class WebServer implements AutoCloseable {
         .initializeSSLContext(false)
         .validateKeyStore(true)
         .build();
-    if(ssl.isSslValid()){
+    if (ssl.isSslValid()) {
       logger.info("Using configured SSL settings for web server");
 
       sslContextFactory.setKeyStorePath(ssl.getKeyStorePath());
@@ -419,7 +419,7 @@ public class WebServer implements AutoCloseable {
       sslContextFactory.setKeyStorePassword(keyStorePasswd);
     }
 
-    final HttpConfiguration httpsConfig = new HttpConfiguration();
+    final HttpConfiguration httpsConfig = baseHttpConfig();
     httpsConfig.addCustomizer(new SecureRequestCustomizer());
 
     // SSL Connector
@@ -439,14 +439,19 @@ public class WebServer implements AutoCloseable {
    */
   private ServerConnector createHttpConnector(int port, int acceptors, int selectors) {
     logger.info("Setting up HTTP connector for web server");
-    final HttpConfiguration httpConfig = new HttpConfiguration();
     final ServerConnector httpConnector =
-        new ServerConnector(embeddedJetty, null, null, null, acceptors, selectors, new HttpConnectionFactory(httpConfig));
+        new ServerConnector(embeddedJetty, null, null, null, acceptors, selectors, new HttpConnectionFactory(baseHttpConfig()));
     httpConnector.setPort(port);
 
     return httpConnector;
   }
 
+  private HttpConfiguration baseHttpConfig() {
+    HttpConfiguration httpConfig = new HttpConfiguration();
+    httpConfig.setSendServerVersion(false);
+    return httpConfig;
+  }
+
   @Override
   public void close() throws Exception {
     if (embeddedJetty != null) {
@@ -458,7 +463,7 @@ public class WebServer implements AutoCloseable {
 
   /**
    * Creates if not exists, and returns File for temporary Javascript directory
-   * @return File handle
+   * @return file handle
    */
   public File getOrCreateTmpJavaScriptDir() {
     if (tmpJavaScriptDir == null && this.drillbit.getContext() != null) {
@@ -468,7 +473,7 @@ public class WebServer implements AutoCloseable {
         generateOptionsDescriptionJSFile();
         generateFunctionJS();
       } catch (IOException e) {
-        logger.error("Unable to create temp dir for JavaScripts. {}", e);
+        logger.error("Unable to create temp dir for JavaScripts: {}", tmpJavaScriptDir.getPath(), e);
       }
     }
     return tmpJavaScriptDir;
@@ -477,7 +482,7 @@ public class WebServer implements AutoCloseable {
 
   /**
    * Generate Options Description JavaScript to serve http://drillhost/options ACE library search features
-   * @throws IOException
+   * @throws IOException when unable to generate functions JS file
    */
   private void generateOptionsDescriptionJSFile() throws IOException {
     // Obtain list of Options & their descriptions
@@ -491,12 +496,12 @@ public class WebServer implements AutoCloseable {
     int numLeftToWrite = options.size();
 
     // Template source Javascript file
-    InputStream optionsDescripTemplateStream = Resource.newClassPathResource(OPTIONS_DESCRIBE_TEMPLATE_JS).getInputStream();
+    InputStream optionsDescribeTemplateStream = Resource.newClassPathResource(OPTIONS_DESCRIBE_TEMPLATE_JS).getInputStream();
     // Generated file
     File optionsDescriptionFile = new File(getOrCreateTmpJavaScriptDir(), OPTIONS_DESCRIBE_JS);
     final String file_content_footer = "};";
     // Create a copy of a template and write with that!
-    java.nio.file.Files.copy(optionsDescripTemplateStream, optionsDescriptionFile.toPath());
+    java.nio.file.Files.copy(optionsDescribeTemplateStream, optionsDescriptionFile.toPath());
     logger.info("Will write {} descriptions to {}", numLeftToWrite, optionsDescriptionFile.getAbsolutePath());
 
     try (BufferedWriter writer = new BufferedWriter(new FileWriter(optionsDescriptionFile, true))) {
@@ -521,7 +526,7 @@ public class WebServer implements AutoCloseable {
 
   /**
    * Generates ACE library javascript populated with list of available SQL functions
-   * @throws IOException
+   * @throws IOException when unable to generate JS file with functions
    */
   private void generateFunctionJS() throws IOException {
     // Naturally ordered set of function names
@@ -530,7 +535,7 @@ public class WebServer implements AutoCloseable {
     List<FunctionHolder> builtInFuncHolderList = this.drillbit.getContext().getFunctionImplementationRegistry().getLocalFunctionRegistry()
         .getAllJarsWithFunctionsHolders().get(LocalFunctionRegistry.BUILT_IN);
 
-    // Build List of 'usable' functions (i.e. functions that start with an alphabet and can be autocompleted by the ACE library)
+    // Build List of 'usable' functions (i.e. functions that start with an alphabet and can be auto-completed by the ACE library)
     // Example of 'unusable' functions would be operators like '<', '!'
     int skipCount = 0;
     for (FunctionHolder builtInFunctionHolder : builtInFuncHolderList) {


[drill] 01/04: DRILL-7343: Add User-Agent UDFs to Drill

Posted by gp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit bd6d7b11ff3f687f7257b86b17b8fe9e2e134b43
Author: Charles Givre <cg...@apache.org>
AuthorDate: Thu Sep 5 10:29:01 2019 -0400

    DRILL-7343: Add User-Agent UDFs to Drill
    
    closes #1840
---
 contrib/udfs/README.md                             |  56 +++++++
 contrib/udfs/pom.xml                               |   5 +
 .../apache/drill/exec/udfs/UserAgentFunctions.java | 172 +++++++++++++++++++++
 .../drill/exec/udfs/TestUserAgentFunctions.java    | 171 ++++++++++++++++++++
 4 files changed, 404 insertions(+)

diff --git a/contrib/udfs/README.md b/contrib/udfs/README.md
new file mode 100644
index 0000000..c0950e7
--- /dev/null
+++ b/contrib/udfs/README.md
@@ -0,0 +1,56 @@
+# Drill User Defined Functions
+
+This `README` documents functions which users have submitted to Apache Drill.  
+
+## User Agent Functions
+Drill UDF for parsing User Agent Strings.
+This function is based on Niels Basjes Java library for parsing user agent strings which is available here: <https://github.com/nielsbasjes/yauaa>.
+
+### Usage
+The function `parse_user_agent()` takes a user agent string as an argument and returns a map of the available fields. Note that not every field will be present in every user agent string. 
+```
+SELECT parse_user_agent( columns[0] ) as ua 
+FROM dfs.`/tmp/data/drill-httpd/ua.csv`;
+```
+The query above returns:
+```
+{
+  "DeviceClass":"Desktop",
+  "DeviceName":"Macintosh",
+  "DeviceBrand":"Apple",
+  "OperatingSystemClass":"Desktop",
+  "OperatingSystemName":"Mac OS X",
+  "OperatingSystemVersion":"10.10.1",
+  "OperatingSystemNameVersion":"Mac OS X 10.10.1",
+  "LayoutEngineClass":"Browser",
+  "LayoutEngineName":"Blink",
+  "LayoutEngineVersion":"39.0",
+  "LayoutEngineVersionMajor":"39",
+  "LayoutEngineNameVersion":"Blink 39.0",
+  "LayoutEngineNameVersionMajor":"Blink 39",
+  "AgentClass":"Browser",
+  "AgentName":"Chrome",
+  "AgentVersion":"39.0.2171.99",
+  "AgentVersionMajor":"39",
+  "AgentNameVersion":"Chrome 39.0.2171.99",
+  "AgentNameVersionMajor":"Chrome 39",
+  "DeviceCpu":"Intel"
+}
+```
+The function returns a Drill map, so you can access any of the fields using Drill's table.map.key notation. For example, the query below illustrates how to extract a field from this map and summarize it:
+
+```
+SELECT uadata.ua.AgentNameVersion AS Browser,
+COUNT( * ) AS BrowserCount
+FROM (
+   SELECT parse_user_agent( columns[0] ) AS ua
+   FROM dfs.drillworkshop.`user-agents.csv`
+) AS uadata
+GROUP BY uadata.ua.AgentNameVersion
+ORDER BY BrowserCount DESC
+```
+The function can also be called with an optional field as an argument. IE:
+```
+SELECT parse_user_agent( `user_agent`, 'AgentName` ) as AgentName ...
+```
+which will just return the requested field. If the user agent string is empty, all fields will have the value of `Hacker`.  
diff --git a/contrib/udfs/pom.xml b/contrib/udfs/pom.xml
index 38f7dfa..0c0f775 100644
--- a/contrib/udfs/pom.xml
+++ b/contrib/udfs/pom.xml
@@ -63,6 +63,11 @@
       <artifactId>proj4j</artifactId>
       <version>0.1.0</version>
     </dependency>
+    <dependency>
+      <groupId>nl.basjes.parse.useragent</groupId>
+      <artifactId>yauaa</artifactId>
+      <version>5.11</version>
+    </dependency>
 
     <!-- Test dependencies -->
     <dependency>
diff --git a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentFunctions.java b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentFunctions.java
new file mode 100644
index 0000000..f684a2d
--- /dev/null
+++ b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentFunctions.java
@@ -0,0 +1,172 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.udfs;
+
+import io.netty.buffer.DrillBuf;
+import org.apache.drill.exec.expr.DrillSimpleFunc;
+import org.apache.drill.exec.expr.annotations.FunctionTemplate;
+import org.apache.drill.exec.expr.annotations.Output;
+import org.apache.drill.exec.expr.annotations.Param;
+import org.apache.drill.exec.expr.annotations.Workspace;
+import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
+import org.apache.drill.exec.expr.holders.VarCharHolder;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+
+import javax.inject.Inject;
+
+public class UserAgentFunctions {
+
+  @FunctionTemplate(name = "parse_user_agent",
+    scope = FunctionTemplate.FunctionScope.SIMPLE
+  )
+  public static class UserAgentFunction implements DrillSimpleFunc {
+    @Param
+    VarCharHolder input;
+
+    @Output
+    BaseWriter.ComplexWriter outWriter;
+
+    @Inject
+    DrillBuf outBuffer;
+
+    @Workspace
+    nl.basjes.parse.useragent.UserAgentAnalyzerDirect uaa;
+
+    public void setup() {
+      uaa = nl.basjes.parse.useragent.UserAgentAnalyzerDirect.newBuilder().dropTests().hideMatcherLoadStats().build();
+      uaa.getAllPossibleFieldNamesSorted();
+    }
+
+    public void eval() {
+      org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter queryMapWriter = outWriter.rootAsMap();
+
+      String userAgentString = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(input);
+
+      nl.basjes.parse.useragent.UserAgent agent = uaa.parse(userAgentString);
+
+      for (String fieldName : agent.getAvailableFieldNamesSorted()) {
+
+        org.apache.drill.exec.expr.holders.VarCharHolder rowHolder = new org.apache.drill.exec.expr.holders.VarCharHolder();
+        String field = agent.getValue(fieldName);
+
+        byte[] rowStringBytes = field.getBytes();
+        outBuffer.reallocIfNeeded(rowStringBytes.length);
+        outBuffer.setBytes(0, rowStringBytes);
+
+        rowHolder.start = 0;
+        rowHolder.end = rowStringBytes.length;
+        rowHolder.buffer = outBuffer;
+
+        queryMapWriter.varChar(fieldName).write(rowHolder);
+      }
+    }
+  }
+
+  @FunctionTemplate(name = "parse_user_agent",
+    scope = FunctionTemplate.FunctionScope.SIMPLE
+  )
+  public static class NullableUserAgentFunction implements DrillSimpleFunc {
+    @Param
+    NullableVarCharHolder input;
+
+    @Output
+    BaseWriter.ComplexWriter outWriter;
+
+    @Inject
+    DrillBuf outBuffer;
+
+    @Workspace
+    nl.basjes.parse.useragent.UserAgentAnalyzerDirect uaa;
+
+    public void setup() {
+      uaa = nl.basjes.parse.useragent.UserAgentAnalyzerDirect.newBuilder().dropTests().hideMatcherLoadStats().build();
+      uaa.getAllPossibleFieldNamesSorted();
+    }
+
+    public void eval() {
+      org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter queryMapWriter = outWriter.rootAsMap();
+      if (input.isSet == 0) {
+        // Return empty map
+        queryMapWriter.start();
+        queryMapWriter.end();
+        return;
+      }
+      String userAgentString = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(input);
+
+      nl.basjes.parse.useragent.UserAgent agent = uaa.parse(userAgentString);
+
+      for (String fieldName : agent.getAvailableFieldNamesSorted()) {
+
+        org.apache.drill.exec.expr.holders.VarCharHolder rowHolder = new org.apache.drill.exec.expr.holders.VarCharHolder();
+        String field = agent.getValue(fieldName);
+
+        byte[] rowStringBytes = field.getBytes();
+        outBuffer.reallocIfNeeded(rowStringBytes.length);
+        outBuffer.setBytes(0, rowStringBytes);
+
+        rowHolder.start = 0;
+        rowHolder.end = rowStringBytes.length;
+        rowHolder.buffer = outBuffer;
+
+        queryMapWriter.varChar(fieldName).write(rowHolder);
+      }
+    }
+  }
+
+  @FunctionTemplate(name = "parse_user_agent",
+    scope = FunctionTemplate.FunctionScope.SIMPLE, nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
+
+  public static class UserAgentFieldFunction implements DrillSimpleFunc {
+    @Param
+    VarCharHolder input;
+
+    @Param
+    VarCharHolder desiredField;
+
+    @Output
+    VarCharHolder out;
+
+    @Inject
+    DrillBuf outBuffer;
+
+    @Workspace
+    nl.basjes.parse.useragent.UserAgentAnalyzerDirect uaa;
+
+    public void setup() {
+      uaa = nl.basjes.parse.useragent.UserAgentAnalyzerDirect.newBuilder().dropTests().hideMatcherLoadStats().build();
+      uaa.getAllPossibleFieldNamesSorted();
+    }
+
+    public void eval() {
+      String userAgentString = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(input);
+      String requestedField = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(desiredField);
+
+      nl.basjes.parse.useragent.UserAgent agent = uaa.parse(userAgentString);
+      String field = agent.getValue(requestedField);
+
+      byte[] rowStringBytes = field.getBytes(java.nio.charset.StandardCharsets.UTF_8);
+      outBuffer.reallocIfNeeded(rowStringBytes.length);
+      outBuffer.setBytes(0, rowStringBytes);
+
+      out.start = 0;
+      out.end = rowStringBytes.length;
+      out.buffer = outBuffer;
+    }
+  }
+}
diff --git a/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestUserAgentFunctions.java b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestUserAgentFunctions.java
new file mode 100644
index 0000000..efa6708
--- /dev/null
+++ b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestUserAgentFunctions.java
@@ -0,0 +1,171 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.drill.exec.udfs;
+
+import org.apache.drill.categories.SqlFunctionTest;
+import org.apache.drill.categories.UnlikelyTest;
+import org.apache.drill.test.ClusterFixture;
+import org.apache.drill.test.ClusterFixtureBuilder;
+import org.apache.drill.test.ClusterTest;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.HashMap;
+import java.util.Map;
+
+@Category({UnlikelyTest.class, SqlFunctionTest.class})
+public class TestUserAgentFunctions extends ClusterTest {
+
+  @BeforeClass
+  public static void setup() throws Exception {
+    ClusterFixtureBuilder builder = ClusterFixture.builder(dirTestWatcher);
+    startCluster(builder);
+  }
+
+  @Test
+  public void testParseUserAgentString() throws Exception {
+    String query = "SELECT t1.ua.DeviceClass AS DeviceClass,\n" +
+      "t1.ua.DeviceName AS DeviceName,\n" +
+      "t1.ua.DeviceBrand AS DeviceBrand,\n" +
+      "t1.ua.DeviceCpuBits AS DeviceCpuBits,\n" +
+      "t1.ua.OperatingSystemClass AS OperatingSystemClass,\n" +
+      "t1.ua.OperatingSystemName AS OperatingSystemName,\n" +
+      "t1.ua.OperatingSystemVersion AS OperatingSystemVersion,\n" +
+      "t1.ua.OperatingSystemVersionMajor AS OperatingSystemVersionMajor,\n" +
+      "t1.ua.OperatingSystemNameVersion AS OperatingSystemNameVersion,\n" +
+      "t1.ua.OperatingSystemNameVersionMajor AS OperatingSystemNameVersionMajor,\n" +
+      "t1.ua.LayoutEngineClass AS LayoutEngineClass,\n" +
+      "t1.ua.LayoutEngineName AS LayoutEngineName,\n" +
+      "t1.ua.LayoutEngineVersion AS LayoutEngineVersion,\n" +
+      "t1.ua.LayoutEngineVersionMajor AS LayoutEngineVersionMajor,\n" +
+      "t1.ua.LayoutEngineNameVersion AS LayoutEngineNameVersion,\n" +
+      "t1.ua.LayoutEngineBuild AS LayoutEngineBuild,\n" +
+      "t1.ua.AgentClass AS AgentClass,\n" +
+      "t1.ua.AgentName AS AgentName,\n" +
+      "t1.ua.AgentVersion AS AgentVersion,\n" +
+      "t1.ua.AgentVersionMajor AS AgentVersionMajor,\n" +
+      "t1.ua.AgentNameVersionMajor AS AgentNameVersionMajor,\n" +
+      "t1.ua.AgentLanguage AS AgentLanguage,\n" +
+      "t1.ua.AgentLanguageCode AS AgentLanguageCode,\n" +
+      "t1.ua.AgentSecurity AS AgentSecurity\n" +
+      "FROM (SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11') AS ua FROM (values(1))) AS t1";
+
+    testBuilder()
+      .sqlQuery(query)
+      .unOrdered()
+      .baselineColumns("DeviceClass", "DeviceName", "DeviceBrand", "DeviceCpuBits", "OperatingSystemClass", "OperatingSystemName", "OperatingSystemVersion", "OperatingSystemVersionMajor", "OperatingSystemNameVersion", "OperatingSystemNameVersionMajor", "LayoutEngineClass", "LayoutEngineName", "LayoutEngineVersion", "LayoutEngineVersionMajor", "LayoutEngineNameVersion", "LayoutEngineBuild", "AgentClass", "AgentName", "AgentVersion", "AgentVersionMajor", "AgentNameVersionMajor", "AgentLang [...]
+      .baselineValues("Desktop", "Desktop", "Unknown", "32", "Desktop", "Windows NT", "XP", "XP", "Windows XP", "Windows XP", "Browser", "Gecko", "1.8.1.11", "1", "Gecko 1.8.1.11", "20071127", "Browser", "Firefox", "2.0.0.11", "2", "Firefox 2", "English (United States)", "en-us", "Strong security")
+      .go();
+  }
+
+  @Test
+  public void testGetHostName() throws Exception {
+    String query = "SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', 'AgentSecurity') AS agent FROM "
+      + "(values(1))";
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues("Strong security")
+      .go();
+  }
+
+  @Test
+  public void testEmptyFieldName() throws Exception {
+    String query = "SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', '') AS agent FROM " + "(values" +
+      "(1))";
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues("Unknown")
+      .go();
+  }
+
+  @Test
+  public void testNullUserAgent() throws Exception {
+    String query = "SELECT parse_user_agent(CAST(null as VARCHAR)) AS agent FROM (values(1))";
+    Map emptyMap = new HashMap();
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues(emptyMap)
+      .go();
+  }
+
+
+  @Test
+  public void testEmptyUAStringAndFieldName() throws Exception {
+    String query = "SELECT parse_user_agent('', '') AS agent FROM (values(1))";
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues("Unknown")
+      .go();
+  }
+
+  @Test
+  public void testNullUAStringAndEmptyFieldName() throws Exception {
+    String query = "SELECT parse_user_agent(CAST(null as VARCHAR), '') AS agent FROM (values(1))";
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues((String) null)
+      .go();
+  }
+
+  @Test
+  public void testNullUAStringAndNullFieldName() throws Exception {
+    String query = "SELECT parse_user_agent(CAST(null as VARCHAR), CAST(null as VARCHAR)) AS agent FROM (values(1))";
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues((String) null)
+      .go();
+  }
+
+  @Test
+  public void testNullUAStringAndFieldName() throws Exception {
+    String query = "SELECT parse_user_agent(CAST(null as VARCHAR), 'AgentSecurity') AS agent FROM (values(1))";
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues((String) null)
+      .go();
+  }
+
+  @Test
+  public void testEmptyUAString() throws Exception {
+    String query = "SELECT t1.ua.AgentName AS AgentName FROM (SELECT parse_user_agent('') AS ua FROM (values(1))) as t1";
+
+    // If the UA string is empty, all returned fields default to "Hacker"
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("AgentName")
+      .baselineValues("Hacker")
+      .go();
+  }
+}


[drill] 02/04: DRILL-7362: COUNT(*) on JSON with outer list results in JsonParse error

Posted by gp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 797524dd2df4a18ef17752aff501a9e099f50a93
Author: ozinoviev <oz...@solit-clouds.ru>
AuthorDate: Thu Aug 29 15:14:17 2019 +0300

    DRILL-7362: COUNT(*) on JSON with outer list results in JsonParse error
    
    closes #1849
---
 .../store/easy/json/reader/BaseJsonReader.java     | 167 +++++++++++++++++++++
 .../store/easy/json/reader/CountingJsonReader.java |  47 ++----
 .../drill/exec/vector/complex/fn/JsonReader.java   | 126 ++--------------
 .../exec/store/json/TestJsonRecordReader.java      |  13 ++
 4 files changed, 203 insertions(+), 150 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonReader.java
new file mode 100644
index 0000000..983aa9f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/BaseJsonReader.java
@@ -0,0 +1,167 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.easy.json.reader;
+
+import com.fasterxml.jackson.core.JsonToken;
+import io.netty.buffer.DrillBuf;
+import org.apache.drill.common.exceptions.UserException;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+/**
+ * Basic reader implementation for json documents.
+ */
+public abstract class BaseJsonReader extends BaseJsonProcessor {
+
+  private static final Logger logger = LoggerFactory.getLogger(BaseJsonReader.class);
+
+  /**
+   * Describes whether or not this reader can unwrap a single root array record
+   * and treat it like a set of distinct records.
+   */
+  private final boolean skipOuterList;
+
+  /**
+   * Whether the reader is currently in a situation where we are unwrapping an
+   * outer list.
+   */
+  private boolean inOuterList;
+
+  public BaseJsonReader(DrillBuf workBuf, boolean enableNanInf, boolean enableEscapeAnyChar, boolean skipOuterList) {
+    super(workBuf, enableNanInf, enableEscapeAnyChar);
+    this.skipOuterList = skipOuterList;
+  }
+
+  @Override
+  public ReadState write(ComplexWriter writer) throws IOException {
+
+    try {
+      JsonToken t = lastSeenJsonToken;
+      if (t == null || t == JsonToken.END_OBJECT) {
+        t = parser.nextToken();
+      }
+      while (!parser.hasCurrentToken() && !parser.isClosed()) {
+        t = parser.nextToken();
+      }
+      lastSeenJsonToken = null;
+
+      if (parser.isClosed()) {
+        return ReadState.END_OF_STREAM;
+      }
+
+      ReadState readState = writeToVector(writer, t);
+
+      switch (readState) {
+        case END_OF_STREAM:
+        case WRITE_SUCCEED:
+          return readState;
+        default:
+          throw getExceptionWithContext(UserException.dataReadError(), null).message(
+            "Failure while reading JSON. (Got an invalid read state %s )", readState.toString())
+            .build(logger);
+      }
+    } catch (com.fasterxml.jackson.core.JsonParseException ex) {
+      if (ignoreJSONParseError()) {
+        if (processJSONException() == JsonExceptionProcessingState.END_OF_STREAM) {
+          return ReadState.JSON_RECORD_PARSE_EOF_ERROR;
+        } else {
+          return ReadState.JSON_RECORD_PARSE_ERROR;
+        }
+      } else {
+        throw ex;
+      }
+    }
+  }
+
+
+  private ReadState writeToVector(ComplexWriter writer, JsonToken t)
+    throws IOException {
+
+    switch (t) {
+      case START_OBJECT:
+        writeDocument(writer, t);
+        break;
+      case START_ARRAY:
+        if (inOuterList) {
+          throw createDocumentTopLevelException();
+        }
+
+        if (skipOuterList) {
+          t = parser.nextToken();
+          if (t == JsonToken.START_OBJECT) {
+            inOuterList = true;
+            writeDocument(writer, t);
+          } else {
+            throw createDocumentTopLevelException();
+          }
+
+        } else {
+          writeDocument(writer, t);
+        }
+        break;
+      case END_ARRAY:
+
+        if (inOuterList) {
+          confirmLast();
+          return ReadState.END_OF_STREAM;
+        } else {
+          throw getExceptionWithContext(UserException.dataReadError(), null).message(
+            "Failure while parsing JSON.  Ran across unexpected %s.", JsonToken.END_ARRAY).build(logger);
+        }
+
+      case NOT_AVAILABLE:
+        return ReadState.END_OF_STREAM;
+      default:
+        throw getExceptionWithContext(UserException.dataReadError(), null)
+          .message(
+            "Failure while parsing JSON.  Found token of [%s].  Drill currently only supports parsing "
+              + "json strings that contain either lists or maps.  The root object cannot be a scalar.",
+            t).build(logger);
+    }
+
+    return ReadState.WRITE_SUCCEED;
+  }
+
+  /**
+   * Writes the contents of the json node starting with the specified token into a complex vector.
+   * Token can take the following values:
+   * - START_ARRAY - the top level of json document is an array and skipping of the outer list is disabled
+   * - START_OBJECT - the top level of json document is a set of white space delimited maps
+   *                  or skipping of the outer list is enabled
+   */
+  protected abstract void writeDocument(ComplexWriter writer, JsonToken t) throws IOException;
+
+  protected UserException createDocumentTopLevelException() {
+    String message = "The top level of your document must either be a single array of maps or a set "
+      + "of white space delimited maps.";
+    return getExceptionWithContext(UserException.dataReadError(), message).build(logger);
+  }
+
+  private void confirmLast() throws IOException {
+    parser.nextToken();
+    if (!parser.isClosed()) {
+      String message = "Drill attempted to unwrap a toplevel list in your document. "
+        + "However, it appears that there is trailing content after this top level list.  Drill only "
+        + "supports querying a set of distinct maps or a single json array with multiple inner maps.";
+      throw getExceptionWithContext(UserException.dataReadError(), message).build(logger);
+    }
+  }
+}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
index 73b93f4..c9bcb0d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/json/reader/CountingJsonReader.java
@@ -23,50 +23,29 @@ import com.fasterxml.jackson.core.JsonToken;
 
 import io.netty.buffer.DrillBuf;
 
-import org.apache.drill.exec.vector.complex.writer.BaseWriter;
+import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter;
 
-public class CountingJsonReader extends BaseJsonProcessor {
+public class CountingJsonReader extends BaseJsonReader {
 
   public CountingJsonReader(DrillBuf workBuf, boolean enableNanInf, boolean enableEscapeAnyChar) {
-    super(workBuf, enableNanInf, enableEscapeAnyChar);
+    super(workBuf, enableNanInf, enableEscapeAnyChar, true);
   }
 
   @Override
-  public ReadState write(BaseWriter.ComplexWriter writer) throws IOException {
-    try {
-      JsonToken token = lastSeenJsonToken;
-      if (token == null || token == JsonToken.END_OBJECT){
-        token = parser.nextToken();
-      }
-      lastSeenJsonToken = null;
-      if (token == JsonToken.FIELD_NAME) {
-        currentFieldName = parser.getText();
-      }
-      if (!parser.hasCurrentToken()) {
-        return ReadState.END_OF_STREAM;
-      } else if (token != JsonToken.START_OBJECT) {
-        throw new com.fasterxml.jackson.core.JsonParseException(
-            parser, String.format("Cannot read from the middle of a record. Current token was %s ", token));
-      }
-      writer.rootAsMap().bit("count").writeBit(1);
-      parser.skipChildren();
-    } catch (com.fasterxml.jackson.core.JsonParseException ex) {
-      if (ignoreJSONParseError()) {
-        if (processJSONException() == JsonExceptionProcessingState.END_OF_STREAM){
-          return ReadState.JSON_RECORD_PARSE_EOF_ERROR;
-        }
-        else{
-          return ReadState.JSON_RECORD_PARSE_ERROR;
-        }
-      } else {
-        throw ex;
-      }
+  protected void writeDocument(ComplexWriter writer, JsonToken t) throws IOException {
+    switch (t) {
+      case START_OBJECT:
+      case START_ARRAY:
+        writer.rootAsMap().bit("count").writeBit(1);
+        parser.skipChildren();
+        break;
+      default:
+        throw createDocumentTopLevelException();
     }
-    return ReadState.WRITE_SUCCEED;
   }
 
   @Override
-  public void ensureAtLeastOneField(BaseWriter.ComplexWriter writer) {
+  public void ensureAtLeastOneField(ComplexWriter writer) {
 
   }
 }
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
index 3426243..ec838d0 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReader.java
@@ -26,7 +26,7 @@ import java.util.List;
 import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.physical.base.GroupScan;
-import org.apache.drill.exec.store.easy.json.reader.BaseJsonProcessor;
+import org.apache.drill.exec.store.easy.json.reader.BaseJsonReader;
 import org.apache.drill.exec.vector.complex.fn.VectorOutput.ListVectorOutput;
 import org.apache.drill.exec.vector.complex.fn.VectorOutput.MapVectorOutput;
 import org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter;
@@ -40,7 +40,7 @@ import org.apache.drill.shaded.guava.com.google.common.base.Charsets;
 import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
 import org.apache.drill.shaded.guava.com.google.common.collect.Lists;
 
-public class JsonReader extends BaseJsonProcessor {
+public class JsonReader extends BaseJsonReader {
   private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory
       .getLogger(JsonReader.class);
   public final static int MAX_RECORD_SIZE = 128 * 1024;
@@ -59,25 +59,12 @@ public class JsonReader extends BaseJsonProcessor {
    */
   private final List<ListWriter> emptyArrayWriters = Lists.newArrayList();
 
-  /**
-   * Describes whether or not this reader can unwrap a single root array record
-   * and treat it like a set of distinct records.
-   */
-  private final boolean skipOuterList;
-
-  /**
-   * Whether the reader is currently in a situation where we are unwrapping an
-   * outer list.
-   */
-  private boolean inOuterList;
-
   private FieldSelection selection;
 
   private JsonReader(Builder builder) {
-    super(builder.managedBuf, builder.enableNanInf, builder.enableEscapeAnyChar);
+    super(builder.managedBuf, builder.enableNanInf, builder.enableEscapeAnyChar, builder.skipOuterList);
     selection = FieldSelection.getFieldSelection(builder.columns);
     workingBuffer = builder.workingBuffer;
-    skipOuterList = builder.skipOuterList;
     allTextMode = builder.allTextMode;
     columns = builder.columns;
     mapOutput = builder.mapOutput;
@@ -184,110 +171,17 @@ public class JsonReader extends BaseJsonProcessor {
   }
 
   @Override
-  public ReadState write(ComplexWriter writer) throws IOException {
-
-    ReadState readState = null;
-    try {
-      JsonToken t = lastSeenJsonToken;
-      if (t == null || t == JsonToken.END_OBJECT) {
-        t = parser.nextToken();
-      }
-      while (!parser.hasCurrentToken() && !parser.isClosed()) {
-        t = parser.nextToken();
-      }
-      lastSeenJsonToken = null;
-
-      if (parser.isClosed()) {
-        return ReadState.END_OF_STREAM;
-      }
-
-      readState = writeToVector(writer, t);
-
-      switch (readState) {
-      case END_OF_STREAM:
+  protected void writeDocument(ComplexWriter writer, JsonToken t) throws IOException {
+    switch (t) {
+      case START_OBJECT:
+        writeDataSwitch(writer.rootAsMap());
         break;
-      case WRITE_SUCCEED:
+      case START_ARRAY:
+        writeDataSwitch(writer.rootAsList());
         break;
       default:
-        throw getExceptionWithContext(UserException.dataReadError(), null).message(
-            "Failure while reading JSON. (Got an invalid read state %s )", readState.toString())
-            .build(logger);
-      }
-    } catch (com.fasterxml.jackson.core.JsonParseException ex) {
-      if (ignoreJSONParseError()) {
-        if (processJSONException() == JsonExceptionProcessingState.END_OF_STREAM) {
-          return ReadState.JSON_RECORD_PARSE_EOF_ERROR;
-        } else {
-          return ReadState.JSON_RECORD_PARSE_ERROR;
-        }
-      } else {
-        throw ex;
-      }
-    }
-    return readState;
-  }
-
-  private void confirmLast() throws IOException {
-    parser.nextToken();
-    if (!parser.isClosed()) {
-      String message = "Drill attempted to unwrap a toplevel list in your document. "
-          + "However, it appears that there is trailing content after this top level list.  Drill only "
-          + "supports querying a set of distinct maps or a single json array with multiple inner maps.";
-      throw getExceptionWithContext(UserException.dataReadError(), message).build(logger);
-    }
-  }
-
-  private ReadState writeToVector(ComplexWriter writer, JsonToken t)
-      throws IOException {
-
-    switch (t) {
-    case START_OBJECT:
-      writeDataSwitch(writer.rootAsMap());
-      break;
-    case START_ARRAY:
-      if (inOuterList) {
-        String message = "The top level of your document must either be a single array of maps or a set "
-            + "of white space delimited maps.";
-        throw getExceptionWithContext(UserException.dataReadError(), message).build(logger);
-      }
-
-      if (skipOuterList) {
-        t = parser.nextToken();
-        if (t == JsonToken.START_OBJECT) {
-          inOuterList = true;
-          writeDataSwitch(writer.rootAsMap());
-        } else {
-          String message = "The top level of your document must either be a single array of maps or a set "
-              + "of white space delimited maps.";
-          throw getExceptionWithContext(UserException.dataReadError(), message).build(logger);
-        }
-
-      } else {
-        writeDataSwitch(writer.rootAsList());
-      }
-      break;
-    case END_ARRAY:
-
-      if (inOuterList) {
-        confirmLast();
-        return ReadState.END_OF_STREAM;
-      } else {
-        throw getExceptionWithContext(UserException.dataReadError(), null).message(
-            "Failure while parsing JSON.  Ran across unexpected %s.", JsonToken.END_ARRAY).build(logger);
-      }
-
-    case NOT_AVAILABLE:
-      return ReadState.END_OF_STREAM;
-    default:
-      throw getExceptionWithContext(UserException.dataReadError(), null)
-          .message(
-              "Failure while parsing JSON.  Found token of [%s].  Drill currently only supports parsing "
-                  + "json strings that contain either lists or maps.  The root object cannot be a scalar.",
-              t).build(logger);
+        throw createDocumentTopLevelException();
     }
-
-    return ReadState.WRITE_SUCCEED;
-
   }
 
   private void writeDataSwitch(MapWriter w) throws IOException {
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
index 7a601eb..719c3a3 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/json/TestJsonRecordReader.java
@@ -261,4 +261,17 @@ public class TestJsonRecordReader extends BaseTestQuery {
     }
     throw new Exception("testNotCountingQueryNotSkippingInvalidJSONRecords");
   }
+
+  @Test
+  @Category(UnlikelyTest.class)
+  // See DRILL-7362
+  /* Test for CountingJSONReader */
+  public void testContainingArrayCount() throws Exception {
+    testBuilder()
+      .sqlQuery("select count(*) as cnt from cp.`store/json/listdoc.json`")
+      .unOrdered()
+      .baselineColumns("cnt")
+      .baselineValues(2L)
+      .go();
+  }
 }


[drill] 04/04: DRILL-7369: Schema for MaprDB tables is not used for the case when several fields are queried

Posted by gp...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

gparai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 65df1fdeffbec41a8e538bdcbdf42356e5e94015
Author: Volodymyr Vysotskyi <vv...@gmail.com>
AuthorDate: Fri Sep 6 14:57:41 2019 +0300

    DRILL-7369: Schema for MaprDB tables is not used for the case when several fields are queried
    
    closes #1852
---
 .../org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java    | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java
index 5895ba5..0485e39 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/vector/complex/fn/JsonReaderUtils.java
@@ -74,12 +74,14 @@ public class JsonReaderUtils {
       TypeProtos.MajorType majorType = allTextMode
           ? Types.optional(TypeProtos.MinorType.VARCHAR)
           : Types.optional(TypeProtos.MinorType.INT);
+      ColumnMetadata metadata = null;
       if (columnMetadata != null) {
-        ColumnMetadata metadata = columnMetadata.metadata(fieldPath.getNameSegment().getPath());
+        metadata = columnMetadata.metadata(fieldPath.getNameSegment().getPath());
         majorType = metadata != null ? metadata.majorType() : majorType;
       }
       types.add(majorType);
-      if (fieldWriter.isEmptyMap()) {
+      // for the case if metadata is specified, ensures that required fields are created
+      if (fieldWriter.isEmptyMap() || metadata != null) {
         emptyStatus.set(fieldIndex, true);
       }
       if (fieldIndex == 0 && !allTextMode && schema == null) {