You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2022/05/19 00:57:15 UTC

[drill] branch master updated: DRILL-8225: Update LogParser and Yauaa to support User-Agent Client Hints (#2549)

This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new aed78f33c8 DRILL-8225: Update LogParser and Yauaa to support User-Agent Client Hints (#2549)
aed78f33c8 is described below

commit aed78f33c8d93c2850897ae1bd5c228d84c30de3
Author: Niels Basjes <ni...@basjes.nl>
AuthorDate: Thu May 19 02:57:08 2022 +0200

    DRILL-8225: Update LogParser and Yauaa to support User-Agent Client Hints (#2549)
    
    * DRILL-8225: Update LogParser and Yauaa to support User-Agent Client Hints
    
    * DRILL-8225: Replace Caffeine caching with Java 8 compliant solution
    
    * DRILL-8225: Fix dependency issues
    
    * DRILL-8225: Use new API for JDK8 caching
---
 contrib/format-excel/pom.xml                       |   1 -
 contrib/format-httpd/pom.xml                       |  20 ++
 contrib/udfs/README.md                             | 108 ++++++-
 contrib/udfs/pom.xml                               |  15 +
 .../drill/exec/udfs/UserAgentAnalyzerProvider.java |  38 +++
 .../apache/drill/exec/udfs/UserAgentFunctions.java |  82 +-----
 .../drill/exec/udfs/TestUserAgentFunctions.java    | 324 ++++++++++++++++++---
 pom.xml                                            |   5 +-
 8 files changed, 488 insertions(+), 105 deletions(-)

diff --git a/contrib/format-excel/pom.xml b/contrib/format-excel/pom.xml
index 619aac4323..ed5a02494c 100644
--- a/contrib/format-excel/pom.xml
+++ b/contrib/format-excel/pom.xml
@@ -32,7 +32,6 @@
 
   <properties>
     <poi.version>5.2.1</poi.version>
-    <log4j.version>2.17.2</log4j.version>
   </properties>
   <dependencies>
     <dependency>
diff --git a/contrib/format-httpd/pom.xml b/contrib/format-httpd/pom.xml
index f8c71e736e..4731b6b69c 100644
--- a/contrib/format-httpd/pom.xml
+++ b/contrib/format-httpd/pom.xml
@@ -60,9 +60,29 @@
           <groupId>nl.basjes.parse.httpdlog</groupId>
           <artifactId>httpdlog-parser</artifactId>
         </exclusion>
+        <exclusion>
+          <groupId>com.github.ben-manes.caffeine</groupId>
+          <artifactId>caffeine</artifactId>
+        </exclusion>
       </exclusions>
     </dependency>
 
+    <!-- The default logging implementation for Yauaa -->
+    <!-- Send all Log4j2 calls to SLF4J -->
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-api</artifactId>
+      <version>${log4j.version}</version>
+    </dependency>
+
+    <!-- The default logging implementation for Yauaa -->
+    <!-- Send all Log4j2 calls to SLF4J -->
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-to-slf4j</artifactId>
+      <version>${log4j.version}</version>
+    </dependency>
+
     <!-- Test dependencies -->
     <dependency>
       <groupId>org.apache.drill.exec</groupId>
diff --git a/contrib/udfs/README.md b/contrib/udfs/README.md
index ae65e1d11b..6f6799d5d3 100644
--- a/contrib/udfs/README.md
+++ b/contrib/udfs/README.md
@@ -224,8 +224,24 @@ SELECT time_bucket(time_stamp, 30000) AS five_min, avg(cpu)
 Drill UDF for parsing User Agent Strings.
 This function is based on Niels Basjes Java library for parsing user agent strings which is available here: <https://github.com/nielsbasjes/yauaa>.
 
-### Usage
+### Basic usage
 The function `parse_user_agent()` takes a user agent string as an argument and returns a map of the available fields. Note that not every field will be present in every user agent string. 
+
+The basic function signature looks like this
+
+    parse_user_agent ( <useragent> )
+    parse_user_agent ( <useragent> , <desired fieldname> )
+
+to support the analysis of the Client Hints it now also supports
+
+    parse_user_agent ( <useragent> , [<header name>,<value>]+ )
+
+or the variant which requires the presence of a `User-Agent` header.
+
+    parse_user_agent ( [<header name>,<value>]+ )
+
+### Analyzing the User-Agent
+
 ```
 SELECT parse_user_agent( columns[0] ) as ua 
 FROM dfs.`/tmp/data/drill-httpd/ua.csv`;
@@ -273,6 +289,96 @@ SELECT parse_user_agent( `user_agent`, 'AgentName` ) as AgentName ...
 ```
 which will just return the requested field. If the user agent string is empty, all fields will have the value of `Hacker`.  
 
+### Analyzing the User-Agent Client Hints
+
+Assume an Apache Httpd webserver with the following LogFormat config:
+
+    LogFormat "%a %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%{Sec-CH-UA}i\" \"%{Sec-CH-UA-Arch}i\" \"%{Sec-CH-UA-Bitness}i\" \"%{Sec-CH-UA-Full-Version}i\" \"%{Sec-CH-UA-Full-Version-List}i\" \"%{Sec-CH-UA-Mobile}i\" \"%{Sec-CH-UA-Model}i\" \"%{Sec-CH-UA-Platform}i\" \"%{Sec-CH-UA-Platform-Version}i\" \"%{Sec-CH-UA-WoW64}i\" %V" combinedhintsvhost
+
+Behind this Apache Httpd webserver is a website that returns the header
+
+    Accept-CH: Sec-CH-UA, Sec-CH-UA-Arch, Sec-CH-UA-Bitness, Sec-CH-UA-Full-Version, Sec-CH-UA-Full-Version-List, Sec-CH-UA-Mobile, Sec-CH-UA-Model, Sec-CH-UA-Platform, Sec-CH-UA-Platform-Version, Sec-CH-UA-WoW64
+
+With all of this in place: these are two of the lines that are found in the access log of this Apache Httpd webserver:
+
+    45.138.228.54 - - [02/May/2022:12:25:10 +0200] "GET / HTTP/1.1" 200 16141 "-" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36" "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"100\", \"Google Chrome\";v=\"100\"" "\"x86\"" "\"64\"" "\"100.0.4896.127\"" "\" Not A;Brand\";v=\"99.0.0.0\", \"Chromium\";v=\"100.0.4896.127\", \"Google Chrome\";v=\"100.0.4896.127\"" "?0" "\"\"" "\"Linux\"" "\"5.13.0\"" "?0" try.yauaa.basjes.nl
+    45.138.228.54 - - [02/May/2022:12:25:34 +0200] "GET / HTTP/1.1" 200 15376 "-" "Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Mobile Safari/537.36" "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"101\", \"Google Chrome\";v=\"101\"" "\"\"" "-" "\"101.0.4951.41\"" "\" Not A;Brand\";v=\"99.0.0.0\", \"Chromium\";v=\"101.0.4951.41\", \"Google Chrome\";v=\"101.0.4951.41\"" "?1" "\"Nokia 7.2\"" "\"Android\"" "\"11.0.0\"" "?0" try.yauaa.basjes.nl
+
+For this example the name of this file is `access.hints`
+
+When doing a query on this data and ONLY use the User-Agent as the input:
+
+    SELECT  uadata.ua.DeviceClass                AS DeviceClass,
+            uadata.ua.AgentNameVersionMajor      AS AgentNameVersionMajor,
+            uadata.ua.OperatingSystemNameVersion AS OperatingSystemNameVersion
+    FROM (
+        SELECT
+                parse_user_agent(`request_user-agent`) AS ua
+        FROM    table(
+                    dfs.`/tmp/access.hints` (
+                        type => 'httpd',
+                        logFormat => '%a %l %u %t "%r" %>s %b "%{Referer}i" "%{User-Agent}i" "%{Sec-CH-UA}i" "%{Sec-CH-UA-Arch}i" "%{Sec-CH-UA-Bitness}i" "%{Sec-CH-UA-Full-Version}i" "%{Sec-CH-UA-Full-Version-List}i" "%{Sec-CH-UA-Mobile}i" "%{Sec-CH-UA-Model}i" "%{Sec-CH-UA-Platform}i" "%{Sec-CH-UA-Platform-Version}i" "%{Sec-CH-UA-WoW64}i" %V',
+                        flattenWildcards => true
+                    )
+                )
+    ) AS uadata;
+
+it produces
+
+    +-------------+-----------------------+----------------------------+
+    | DeviceClass | AgentNameVersionMajor | OperatingSystemNameVersion |
+    +-------------+-----------------------+----------------------------+
+    | Desktop     | Chrome 100            | Linux ??                   |
+    | Phone       | Chrome 101            | Android ??                 |
+    +-------------+-----------------------+----------------------------+
+    2 rows selected (0.183 seconds)
+
+The first example here does not have the exact version of the operating system as part of the User-Agent and this results in `Linux ??`.
+
+The second example shows `Android 10` but was recognized as being a `reduced` variant of the `User-Agent`, this means that the version `10` is an invalid standard value that is not true. So here you see `Android ??`. See https://www.chromium.org/updates/ua-reduction
+
+Now let's repeat the same and use the recorded `User-Agent Client Hint` header values:
+
+    SELECT  uadata.ua.DeviceClass                AS DeviceClass,
+            uadata.ua.AgentNameVersionMajor      AS AgentNameVersionMajor,
+            uadata.ua.OperatingSystemNameVersion AS OperatingSystemNameVersion
+    FROM (
+        SELECT
+                parse_user_agent(
+                    'User-Agent' ,                  `request_user-agent`,
+                    'sec-ch-ua',                    `request_header_sec-ch-ua`,
+                    'sec-ch-ua-arch',               `request_header_sec-ch-ua-arch`,
+                    'sec-ch-ua-bitness',            `request_header_sec-ch-ua-bitness`,
+                    'sec-ch-ua-full-version',       `request_header_sec-ch-ua-full-version`,
+                    'sec-ch-ua-full-version-list',  `request_header_sec-ch-ua-full-version-list`,
+                    'sec-ch-ua-mobile',             `request_header_sec-ch-ua-mobile`,
+                    'sec-ch-ua-model',              `request_header_sec-ch-ua-model`,
+                    'sec-ch-ua-platform',           `request_header_sec-ch-ua-platform`,
+                    'sec-ch-ua-platform-version',   `request_header_sec-ch-ua-platform-version`,
+                    'sec-ch-ua-wow64',              `request_header_sec-ch-ua-wow64`
+                ) AS ua
+        FROM    table(
+                    dfs.`/tmp/access.hints` (
+                        type => 'httpd',
+                        logFormat => '%a %l %u %t "%r" %>s %b "%{Referer}i" "%{User-Agent}i" "%{Sec-CH-UA}i" "%{Sec-CH-UA-Arch}i" "%{Sec-CH-UA-Bitness}i" "%{Sec-CH-UA-Full-Version}i" "%{Sec-CH-UA-Full-Version-List}i" "%{Sec-CH-UA-Mobile}i" "%{Sec-CH-UA-Model}i" "%{Sec-CH-UA-Platform}i" "%{Sec-CH-UA-Platform-Version}i" "%{Sec-CH-UA-WoW64}i" %V',
+                        flattenWildcards => true
+                    )
+                )
+    ) AS uadata;
+
+
+which produces
+
+    +-------------+-----------------------+----------------------------+
+    | DeviceClass | AgentNameVersionMajor | OperatingSystemNameVersion |
+    +-------------+-----------------------+----------------------------+
+    | Desktop     | Chrome 100            | Linux 5.13.0               |
+    | Phone       | Chrome 101            | Android 11.0.0             |
+    +-------------+-----------------------+----------------------------+
+    2 rows selected (0.275 seconds)
+
+The improvement after adding the Client Hints is evident.
+
 ## Map Schema Function
 This function allows you to drill down into the schema of maps.  The REST API and JDBC interfaces will only return `MAP`, `LIST` for the MAP, however, it is not possible to get 
 the schema of the inner map. The function `getMapSchema(<MAP>)` will return a `MAP` of the fields and datatypes.
diff --git a/contrib/udfs/pom.xml b/contrib/udfs/pom.xml
index 5d50472170..a33085bf49 100644
--- a/contrib/udfs/pom.xml
+++ b/contrib/udfs/pom.xml
@@ -68,6 +68,12 @@
       <groupId>nl.basjes.parse.useragent</groupId>
       <artifactId>yauaa</artifactId>
       <version>${yauaa.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>com.github.ben-manes.caffeine</groupId>
+          <artifactId>caffeine</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
 
     <!-- Test dependencies -->
@@ -86,6 +92,15 @@
       <version>${project.version}</version>
       <scope>test</scope>
     </dependency>
+
+    <!-- The default logging implementation for Yauaa -->
+    <!-- Send all Log4j2 calls to SLF4J -->
+    <dependency>
+      <groupId>org.apache.logging.log4j</groupId>
+      <artifactId>log4j-to-slf4j</artifactId>
+      <version>${log4j.version}</version>
+    </dependency>
+
   </dependencies>
 
   <build>
diff --git a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentAnalyzerProvider.java b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentAnalyzerProvider.java
index 5094527b16..e3cdaa4e1c 100644
--- a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentAnalyzerProvider.java
+++ b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentAnalyzerProvider.java
@@ -18,7 +18,16 @@
 
 package org.apache.drill.exec.udfs;
 
+import nl.basjes.parse.useragent.AnalyzerUtilities.ParsedArguments;
 import nl.basjes.parse.useragent.UserAgentAnalyzer;
+import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static nl.basjes.parse.useragent.AnalyzerUtilities.parseArguments;
+import static nl.basjes.parse.useragent.UserAgent.USERAGENT_HEADER;
+import static org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder;
 
 public class UserAgentAnalyzerProvider {
 
@@ -26,11 +35,40 @@ public class UserAgentAnalyzerProvider {
     return UserAgentAnalyzerHolder.INSTANCE;
   }
 
+  public static List<String> getAllFields() {
+    return UserAgentAnalyzerHolder.INSTANCE.getAllPossibleFieldNamesSorted();
+  }
+
+  private static List<String> allHeaders = null;
+
+  public static synchronized List<String> getAllHeaders() {
+    if (allHeaders == null) {
+      allHeaders = new ArrayList<>();
+      allHeaders.add(USERAGENT_HEADER);
+      allHeaders.addAll(getInstance().supportedClientHintHeaders());
+    }
+    return allHeaders;
+  }
+
   private static class UserAgentAnalyzerHolder {
     private static final UserAgentAnalyzer INSTANCE = UserAgentAnalyzer.newBuilder()
             .dropTests()
             .hideMatcherLoadStats()
+            // Caffeine is a Java 11+ library.
+            .useJava8CompatibleCaching()
             .immediateInitialization()
             .build();
   }
+
+  public static ParsedArguments parseArgumentArray(NullableVarCharHolder[] input) {
+    List<String> inputList = new ArrayList<>();
+    for (NullableVarCharHolder holder : input) {
+      if (holder == null || holder.buffer == null) {
+        inputList.add(null);
+      } else {
+        inputList.add(getStringFromVarCharHolder(holder));
+      }
+    }
+    return parseArguments(inputList, getAllFields(), getAllHeaders());
+  }
 }
diff --git a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentFunctions.java b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentFunctions.java
index 40f97a1a1d..39b7bccee3 100644
--- a/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentFunctions.java
+++ b/contrib/udfs/src/main/java/org/apache/drill/exec/udfs/UserAgentFunctions.java
@@ -24,23 +24,22 @@ import org.apache.drill.exec.expr.annotations.FunctionTemplate;
 import org.apache.drill.exec.expr.annotations.Output;
 import org.apache.drill.exec.expr.annotations.Param;
 import org.apache.drill.exec.expr.annotations.Workspace;
-import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
-import org.apache.drill.exec.expr.holders.VarCharHolder;
-import org.apache.drill.exec.vector.complex.writer.BaseWriter;
 
 import javax.inject.Inject;
 
 public class UserAgentFunctions {
 
-  @FunctionTemplate(name = "parse_user_agent",
+  @FunctionTemplate(
+    name = "parse_user_agent",
+    isVarArg = true,
     scope = FunctionTemplate.FunctionScope.SIMPLE
   )
   public static class UserAgentFunction implements DrillSimpleFunc {
     @Param
-    VarCharHolder input;
+    org.apache.drill.exec.expr.holders.NullableVarCharHolder[] input;
 
     @Output
-    BaseWriter.ComplexWriter outWriter;
+    org.apache.drill.exec.vector.complex.writer.BaseWriter.ComplexWriter outWriter;
 
     @Inject
     DrillBuf outBuffer;
@@ -53,72 +52,23 @@ public class UserAgentFunctions {
 
     public void setup() {
       uaa = org.apache.drill.exec.udfs.UserAgentAnalyzerProvider.getInstance();
-      allFields = uaa.getAllPossibleFieldNamesSorted();
+      allFields = org.apache.drill.exec.udfs.UserAgentAnalyzerProvider.getAllFields();
     }
 
     public void eval() {
       org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter queryMapWriter = outWriter.rootAsMap();
 
-      String userAgentString = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(input);
-
-      nl.basjes.parse.useragent.UserAgent agent = uaa.parse(userAgentString);
-
-      for (String fieldName: allFields) {
-
-        org.apache.drill.exec.expr.holders.VarCharHolder rowHolder = new org.apache.drill.exec.expr.holders.VarCharHolder();
-        String field = agent.getValue(fieldName);
-
-        byte[] rowStringBytes = field.getBytes();
-        outBuffer = outBuffer.reallocIfNeeded(rowStringBytes.length);
-        outBuffer.setBytes(0, rowStringBytes);
-
-        rowHolder.start = 0;
-        rowHolder.end = rowStringBytes.length;
-        rowHolder.buffer = outBuffer;
-
-        queryMapWriter.varChar(fieldName).write(rowHolder);
-      }
-    }
-  }
-
-  @FunctionTemplate(name = "parse_user_agent",
-    scope = FunctionTemplate.FunctionScope.SIMPLE
-  )
-  public static class NullableUserAgentFunction implements DrillSimpleFunc {
-    @Param
-    NullableVarCharHolder input;
-
-    @Output
-    BaseWriter.ComplexWriter outWriter;
-
-    @Inject
-    DrillBuf outBuffer;
+      nl.basjes.parse.useragent.AnalyzerUtilities.ParsedArguments parsedArguments =
+        org.apache.drill.exec.udfs.UserAgentAnalyzerProvider.parseArgumentArray(input);
 
-    @Workspace
-    nl.basjes.parse.useragent.UserAgentAnalyzer uaa;
+      nl.basjes.parse.useragent.UserAgent agent = uaa.parse(parsedArguments.getRequestHeaders());
 
-    @Workspace
-    java.util.List<String> allFields;
-
-    public void setup() {
-      uaa = org.apache.drill.exec.udfs.UserAgentAnalyzerProvider.getInstance();
-      allFields = uaa.getAllPossibleFieldNamesSorted();
-    }
-
-    public void eval() {
-      org.apache.drill.exec.vector.complex.writer.BaseWriter.MapWriter queryMapWriter = outWriter.rootAsMap();
-      if (input.isSet == 0) {
-        // Return empty map
-        queryMapWriter.start();
-        queryMapWriter.end();
-        return;
+      java.util.List<String> wantedFields = parsedArguments.getWantedFields();
+      if (wantedFields.isEmpty()) {
+        wantedFields = allFields;
       }
-      String userAgentString = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.getStringFromVarCharHolder(input);
-
-      nl.basjes.parse.useragent.UserAgent agent = uaa.parse(userAgentString);
-
-      for (String fieldName: allFields) {
 
+      for (String fieldName : wantedFields) {
         org.apache.drill.exec.expr.holders.VarCharHolder rowHolder = new org.apache.drill.exec.expr.holders.VarCharHolder();
         String field = agent.getValue(fieldName);
 
@@ -140,13 +90,13 @@ public class UserAgentFunctions {
 
   public static class UserAgentFieldFunction implements DrillSimpleFunc {
     @Param
-    VarCharHolder input;
+    org.apache.drill.exec.expr.holders.VarCharHolder input;
 
     @Param
-    VarCharHolder desiredField;
+    org.apache.drill.exec.expr.holders.VarCharHolder desiredField;
 
     @Output
-    VarCharHolder out;
+    org.apache.drill.exec.expr.holders.VarCharHolder out;
 
     @Inject
     DrillBuf outBuffer;
diff --git a/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestUserAgentFunctions.java b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestUserAgentFunctions.java
index 49b700156b..0ac40ed774 100644
--- a/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestUserAgentFunctions.java
+++ b/contrib/udfs/src/test/java/org/apache/drill/exec/udfs/TestUserAgentFunctions.java
@@ -18,8 +18,11 @@
 
 package org.apache.drill.exec.udfs;
 
+import nl.basjes.parse.useragent.UserAgentAnalyzer;
 import org.apache.drill.categories.SqlFunctionTest;
 import org.apache.drill.categories.UnlikelyTest;
+import org.apache.drill.common.expression.ExpressionStringBuilder;
+import org.apache.drill.exec.util.Text;
 import org.apache.drill.test.ClusterFixture;
 import org.apache.drill.test.ClusterFixtureBuilder;
 import org.apache.drill.test.ClusterTest;
@@ -28,7 +31,12 @@ import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
 import java.util.Collections;
+import java.util.LinkedHashMap;
 import java.util.Map;
+import java.util.TreeMap;
+
+import static org.apache.drill.test.TestBuilder.parsePath;
+import static org.junit.Assert.assertEquals;
 
 @Category({UnlikelyTest.class, SqlFunctionTest.class})
 public class TestUserAgentFunctions extends ClusterTest {
@@ -41,44 +49,77 @@ public class TestUserAgentFunctions extends ClusterTest {
 
   @Test
   public void testParseUserAgentString() throws Exception {
-    String query = "SELECT t1.ua.DeviceClass AS DeviceClass,\n" +
-      "t1.ua.DeviceName AS DeviceName,\n" +
-      "t1.ua.DeviceBrand AS DeviceBrand,\n" +
-      "t1.ua.DeviceCpuBits AS DeviceCpuBits,\n" +
-      "t1.ua.OperatingSystemClass AS OperatingSystemClass,\n" +
-      "t1.ua.OperatingSystemName AS OperatingSystemName,\n" +
-      "t1.ua.OperatingSystemVersion AS OperatingSystemVersion,\n" +
-      "t1.ua.OperatingSystemVersionMajor AS OperatingSystemVersionMajor,\n" +
-      "t1.ua.OperatingSystemNameVersion AS OperatingSystemNameVersion,\n" +
-      "t1.ua.OperatingSystemNameVersionMajor AS OperatingSystemNameVersionMajor,\n" +
-      "t1.ua.LayoutEngineClass AS LayoutEngineClass,\n" +
-      "t1.ua.LayoutEngineName AS LayoutEngineName,\n" +
-      "t1.ua.LayoutEngineVersion AS LayoutEngineVersion,\n" +
-      "t1.ua.LayoutEngineVersionMajor AS LayoutEngineVersionMajor,\n" +
-      "t1.ua.LayoutEngineNameVersion AS LayoutEngineNameVersion,\n" +
-      "t1.ua.LayoutEngineBuild AS LayoutEngineBuild,\n" +
-      "t1.ua.AgentClass AS AgentClass,\n" +
-      "t1.ua.AgentName AS AgentName,\n" +
-      "t1.ua.AgentVersion AS AgentVersion,\n" +
-      "t1.ua.AgentVersionMajor AS AgentVersionMajor,\n" +
-      "t1.ua.AgentNameVersionMajor AS AgentNameVersionMajor,\n" +
-      "t1.ua.AgentLanguage AS AgentLanguage,\n" +
-      "t1.ua.AgentLanguageCode AS AgentLanguageCode,\n" +
-      "t1.ua.AgentSecurity AS AgentSecurity\n" +
-      "FROM (SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11') AS ua FROM (values(1))) AS t1";
+    String query =
+      "SELECT t1.ua.DeviceClass                     AS DeviceClass," +
+      "       t1.ua.DeviceName                      AS DeviceName," +
+      "       t1.ua.DeviceBrand                     AS DeviceBrand," +
+      "       t1.ua.DeviceCpuBits                   AS DeviceCpuBits," +
+      "       t1.ua.OperatingSystemClass            AS OperatingSystemClass," +
+      "       t1.ua.OperatingSystemName             AS OperatingSystemName," +
+      "       t1.ua.OperatingSystemVersion          AS OperatingSystemVersion," +
+      "       t1.ua.OperatingSystemVersionMajor     AS OperatingSystemVersionMajor," +
+      "       t1.ua.OperatingSystemNameVersion      AS OperatingSystemNameVersion," +
+      "       t1.ua.OperatingSystemNameVersionMajor AS OperatingSystemNameVersionMajor," +
+      "       t1.ua.LayoutEngineClass               AS LayoutEngineClass," +
+      "       t1.ua.LayoutEngineName                AS LayoutEngineName," +
+      "       t1.ua.LayoutEngineVersion             AS LayoutEngineVersion," +
+      "       t1.ua.LayoutEngineVersionMajor        AS LayoutEngineVersionMajor," +
+      "       t1.ua.LayoutEngineNameVersion         AS LayoutEngineNameVersion," +
+      "       t1.ua.LayoutEngineBuild               AS LayoutEngineBuild," +
+      "       t1.ua.AgentClass                      AS AgentClass," +
+      "       t1.ua.AgentName                       AS AgentName," +
+      "       t1.ua.AgentVersion                    AS AgentVersion," +
+      "       t1.ua.AgentVersionMajor               AS AgentVersionMajor," +
+      "       t1.ua.AgentNameVersionMajor           AS AgentNameVersionMajor," +
+      "       t1.ua.AgentLanguage                   AS AgentLanguage," +
+      "       t1.ua.AgentLanguageCode               AS AgentLanguageCode," +
+      "       t1.ua.AgentSecurity                   AS AgentSecurity " +
+      "FROM (" +
+      "    SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11') AS ua" +
+      "    FROM (values(1))" +
+      ") AS t1";
 
     testBuilder()
       .sqlQuery(query)
       .unOrdered()
-      .baselineColumns("DeviceClass", "DeviceName", "DeviceBrand", "DeviceCpuBits", "OperatingSystemClass", "OperatingSystemName", "OperatingSystemVersion", "OperatingSystemVersionMajor", "OperatingSystemNameVersion", "OperatingSystemNameVersionMajor", "LayoutEngineClass", "LayoutEngineName", "LayoutEngineVersion", "LayoutEngineVersionMajor", "LayoutEngineNameVersion", "LayoutEngineBuild", "AgentClass", "AgentName", "AgentVersion", "AgentVersionMajor", "AgentNameVersionMajor", "AgentLang [...]
-      .baselineValues("Desktop", "Desktop", "Unknown", "32", "Desktop", "Windows NT", "XP", "XP", "Windows XP", "Windows XP", "Browser", "Gecko", "1.8.1.11", "1", "Gecko 1.8.1.11", "20071127", "Browser", "Firefox", "2.0.0.11", "2", "Firefox 2", "English (United States)", "en-us", "Strong security")
+      .baselineRecords(
+        Collections.singletonList(// Singleton list because we expect 1 record
+          expectations(
+            "DeviceClass",                     "Desktop",
+            "DeviceName",                      "Desktop",
+            "DeviceBrand",                     "Unknown",
+            "DeviceCpuBits",                   "32",
+            "OperatingSystemClass",            "Desktop",
+            "OperatingSystemName",             "Windows NT",
+            "OperatingSystemVersion",          "XP",
+            "OperatingSystemVersionMajor",     "XP",
+            "OperatingSystemNameVersion",      "Windows XP",
+            "OperatingSystemNameVersionMajor", "Windows XP",
+            "LayoutEngineClass",               "Browser",
+            "LayoutEngineName",                "Gecko",
+            "LayoutEngineVersion",             "1.8.1.11",
+            "LayoutEngineVersionMajor",        "1",
+            "LayoutEngineNameVersion",         "Gecko 1.8.1.11",
+            "LayoutEngineBuild",               "20071127",
+            "AgentClass",                      "Browser",
+            "AgentName",                       "Firefox",
+            "AgentVersion",                    "2.0.0.11",
+            "AgentVersionMajor",               "2",
+            "AgentNameVersionMajor",           "Firefox 2",
+            "AgentLanguage",                   "English (United States)",
+            "AgentLanguageCode",               "en-us",
+            "AgentSecurity",                   "Strong security"
+          )
+        )
+      )
       .go();
   }
 
   @Test
-  public void testGetHostName() throws Exception {
-    String query = "SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', 'AgentSecurity') AS agent FROM "
-      + "(values(1))";
+  public void testValidFieldName() throws Exception {
+    String query =
+      "SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', 'AgentSecurity') AS agent " +
+      "FROM (values(1))";
     testBuilder()
       .sqlQuery(query)
       .ordered()
@@ -89,8 +130,22 @@ public class TestUserAgentFunctions extends ClusterTest {
 
   @Test
   public void testEmptyFieldName() throws Exception {
-    String query = "SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', '') AS agent FROM " + "(values" +
-      "(1))";
+    String query =
+      "SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', '') AS agent " +
+      "FROM (values(1))";
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues("Unknown")
+      .go();
+  }
+
+  @Test
+  public void testBadFieldName() throws Exception {
+    String query =
+      "SELECT parse_user_agent('Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11', 'NoSuchField') AS agent " +
+      "FROM (values(1))";
     testBuilder()
       .sqlQuery(query)
       .ordered()
@@ -101,17 +156,24 @@ public class TestUserAgentFunctions extends ClusterTest {
 
   @Test
   public void testNullUserAgent() throws Exception {
+    // If a null value is provided then the UserAgentAnalyzer will classify this as a Hacker because all requests normally have a User-Agent.
+    UserAgentAnalyzer analyzer = UserAgentAnalyzer.newBuilder().showMinimalVersion().withoutCache().dropTests().immediateInitialization().build();
+    Map<String, String> expected = analyzer.parse((String)null).toMap(analyzer.getAllPossibleFieldNamesSorted());
+
+    Map<String, Text> expectedRecord = new TreeMap<>();
+    for (Map.Entry<String, String> entry : expected.entrySet()) {
+      expectedRecord.put(entry.getKey(), new Text(entry.getValue()));
+    }
+
     String query = "SELECT parse_user_agent(CAST(null as VARCHAR)) AS agent FROM (values(1))";
-    Map<?, ?> emptyMap = Collections.emptyMap();
     testBuilder()
       .sqlQuery(query)
       .ordered()
       .baselineColumns("agent")
-      .baselineValues(emptyMap)
+      .baselineValues(expectedRecord)
       .go();
   }
 
-
   @Test
   public void testEmptyUAStringAndFieldName() throws Exception {
     String query = "SELECT parse_user_agent('', '') AS agent FROM (values(1))";
@@ -134,6 +196,17 @@ public class TestUserAgentFunctions extends ClusterTest {
       .go();
   }
 
+  @Test
+  public void testNullUAStringAndBadFieldName() throws Exception {
+    String query = "SELECT parse_user_agent(CAST(null as VARCHAR), 'NoSuchField') AS agent FROM (values(1))";
+    testBuilder()
+      .sqlQuery(query)
+      .ordered()
+      .baselineColumns("agent")
+      .baselineValues((String) null)
+      .go();
+  }
+
   @Test
   public void testNullUAStringAndNullFieldName() throws Exception {
     String query = "SELECT parse_user_agent(CAST(null as VARCHAR), CAST(null as VARCHAR)) AS agent FROM (values(1))";
@@ -168,4 +241,185 @@ public class TestUserAgentFunctions extends ClusterTest {
       .baselineValues("Hacker")
       .go();
   }
+
+  @Test
+  public void testClientHints() throws Exception {
+    String query =
+      "SELECT " +
+      "   t1.ua.DeviceClass                               AS DeviceClass,\n" +
+      "   t1.ua.DeviceName                                AS DeviceName,\n" +
+      "   t1.ua.DeviceBrand                               AS DeviceBrand,\n" +
+      "   t1.ua.DeviceCpu                                 AS DeviceCpu,\n" +
+      "   t1.ua.DeviceCpuBits                             AS DeviceCpuBits,\n" +
+      "   t1.ua.OperatingSystemClass                      AS OperatingSystemClass,\n" +
+      "   t1.ua.OperatingSystemName                       AS OperatingSystemName,\n" +
+      "   t1.ua.OperatingSystemVersion                    AS OperatingSystemVersion,\n" +
+      "   t1.ua.OperatingSystemVersionMajor               AS OperatingSystemVersionMajor,\n" +
+      "   t1.ua.OperatingSystemNameVersion                AS OperatingSystemNameVersion,\n" +
+      "   t1.ua.OperatingSystemNameVersionMajor           AS OperatingSystemNameVersionMajor,\n" +
+      "   t1.ua.LayoutEngineClass                         AS LayoutEngineClass,\n" +
+      "   t1.ua.LayoutEngineName                          AS LayoutEngineName,\n" +
+      "   t1.ua.LayoutEngineVersion                       AS LayoutEngineVersion,\n" +
+      "   t1.ua.LayoutEngineVersionMajor                  AS LayoutEngineVersionMajor,\n" +
+      "   t1.ua.LayoutEngineNameVersion                   AS LayoutEngineNameVersion,\n" +
+      "   t1.ua.LayoutEngineNameVersionMajor              AS LayoutEngineNameVersionMajor,\n" +
+      "   t1.ua.AgentClass                                AS AgentClass,\n" +
+      "   t1.ua.AgentName                                 AS AgentName,\n" +
+      "   t1.ua.AgentVersion                              AS AgentVersion,\n" +
+      "   t1.ua.AgentVersionMajor                         AS AgentVersionMajor,\n" +
+      "   t1.ua.AgentNameVersion                          AS AgentNameVersion,\n" +
+      "   t1.ua.AgentNameVersionMajor                     AS AgentNameVersionMajor\n" +
+      "FROM (" +
+      "   SELECT" +
+      "       parse_user_agent(" +
+      "           'User-Agent',                   'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'," +
+      "           'Sec-Ch-Ua',                    '\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"100\", \"Google Chrome\";v=\"100\"'," +
+      "           'Sec-Ch-Ua-Arch',               '\"x86\"'," +
+      "           'Sec-Ch-Ua-Bitness',            '\"64\"'," +
+      "           'Sec-Ch-Ua-Full-Version',       '\"100.0.4896.127\"'," +
+      "           'Sec-Ch-Ua-Full-Version-List',  '\" Not A;Brand\";v=\"99.0.0.0\", \"Chromium\";v=\"100.0.4896.127\", \"Google Chrome\";v=\"100.0.4896.127\"'," +
+      "           'Sec-Ch-Ua-Mobile',             '?0'," +
+      "           'Sec-Ch-Ua-Model',              '\"\"'," +
+      "           'Sec-Ch-Ua-Platform',           '\"Linux\"'," +
+      "           'Sec-Ch-Ua-Platform-Version',   '\"5.13.0\"'," +
+      "           'Sec-Ch-Ua-Wow64',              '?0'" +
+      "       ) AS ua " +
+      "   FROM (values(1))" +
+      ") AS t1";
+
+    testBuilder()
+      .sqlQuery(query)
+      .unOrdered()
+      .baselineRecords(
+        Collections.singletonList(// Singleton list because we expect 1 record
+          expectations(
+            "DeviceClass",                      "Desktop",
+            "DeviceName",                       "Linux Desktop",
+            "DeviceBrand",                      "Unknown",
+            "DeviceCpu",                        "Intel x86_64",
+            "DeviceCpuBits",                    "64",
+            "OperatingSystemClass",             "Desktop",
+            "OperatingSystemName",              "Linux",
+            "OperatingSystemVersion",           "5.13.0",
+            "OperatingSystemVersionMajor",      "5",
+            "OperatingSystemNameVersion",       "Linux 5.13.0",
+            "OperatingSystemNameVersionMajor",  "Linux 5",
+            "LayoutEngineClass",                "Browser",
+            "LayoutEngineName",                 "Blink",
+            "LayoutEngineVersion",              "100.0",
+            "LayoutEngineVersionMajor",         "100",
+            "LayoutEngineNameVersion",          "Blink 100.0",
+            "LayoutEngineNameVersionMajor",     "Blink 100",
+            "AgentClass",                       "Browser",
+            "AgentName",                        "Chrome",
+            "AgentVersion",                     "100.0.4896.127",
+            "AgentVersionMajor",                "100",
+            "AgentNameVersion",                 "Chrome 100.0.4896.127",
+            "AgentNameVersionMajor",            "Chrome 100"
+          )
+        )
+      )
+      .go();
+  }
+
+  // ====================================================================
+
+  @Test
+  public void testEmptyClientHints() throws Exception {
+    String query =
+      "SELECT " +
+      "   t1.ua.DeviceClass                               AS DeviceClass,\n" +
+      "   t1.ua.DeviceName                                AS DeviceName,\n" +
+      "   t1.ua.DeviceBrand                               AS DeviceBrand,\n" +
+      "   t1.ua.DeviceCpu                                 AS DeviceCpu,\n" +
+      "   t1.ua.DeviceCpuBits                             AS DeviceCpuBits,\n" +
+      "   t1.ua.OperatingSystemClass                      AS OperatingSystemClass,\n" +
+      "   t1.ua.OperatingSystemName                       AS OperatingSystemName,\n" +
+      "   t1.ua.OperatingSystemVersion                    AS OperatingSystemVersion,\n" +
+      "   t1.ua.OperatingSystemVersionMajor               AS OperatingSystemVersionMajor,\n" +
+      "   t1.ua.OperatingSystemNameVersion                AS OperatingSystemNameVersion,\n" +
+      "   t1.ua.OperatingSystemNameVersionMajor           AS OperatingSystemNameVersionMajor,\n" +
+      "   t1.ua.LayoutEngineClass                         AS LayoutEngineClass,\n" +
+      "   t1.ua.LayoutEngineName                          AS LayoutEngineName,\n" +
+      "   t1.ua.LayoutEngineVersion                       AS LayoutEngineVersion,\n" +
+      "   t1.ua.LayoutEngineVersionMajor                  AS LayoutEngineVersionMajor,\n" +
+      "   t1.ua.LayoutEngineNameVersion                   AS LayoutEngineNameVersion,\n" +
+      "   t1.ua.LayoutEngineNameVersionMajor              AS LayoutEngineNameVersionMajor,\n" +
+      "   t1.ua.AgentClass                                AS AgentClass,\n" +
+      "   t1.ua.AgentName                                 AS AgentName,\n" +
+      "   t1.ua.AgentVersion                              AS AgentVersion,\n" +
+      "   t1.ua.AgentVersionMajor                         AS AgentVersionMajor,\n" +
+      "   t1.ua.AgentNameVersion                          AS AgentNameVersion,\n" +
+      "   t1.ua.AgentNameVersionMajor                     AS AgentNameVersionMajor\n" +
+      "FROM (" +
+      "   SELECT" +
+      "       parse_user_agent(" +
+      // NOTE: Here we do NOT say "User-Agent" --> It is just the first one in the list.
+      "           'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36'," +
+      "           'Sec-Ch-Ua',                    ''," +
+      "           'Sec-Ch-Ua-Arch',               ''," +
+      "           'Sec-Ch-Ua-Bitness',            ''," +
+      "           'Sec-Ch-Ua-Full-Version',       ''," +
+      "           'Sec-Ch-Ua-Full-Version-List',  ''," +
+      "           'Sec-Ch-Ua-Mobile',             ''," +
+      "           'Sec-Ch-Ua-Model',              ''," +
+      "           'Sec-Ch-Ua-Platform',           ''," +
+      "           'Sec-Ch-Ua-Platform-Version',   ''," +
+      "           'Sec-Ch-Ua-Wow64',              ''" +
+      "       ) AS ua " +
+      "   FROM (values(1))" +
+      ") AS t1";
+
+    testBuilder()
+      .sqlQuery(query)
+      .unOrdered()
+      .baselineRecords(
+        Collections.singletonList(// Singleton list because we expect 1 record
+          expectations(
+            "DeviceClass",                      "Desktop",
+            "DeviceName",                       "Linux Desktop",
+            "DeviceBrand",                      "Unknown",
+            "DeviceCpu",                        "Intel x86_64",
+            "DeviceCpuBits",                    "64",
+            "OperatingSystemClass",             "Desktop",
+            "OperatingSystemName",              "Linux",
+            "OperatingSystemVersion",           "??",
+            "OperatingSystemVersionMajor",      "??",
+            "OperatingSystemNameVersion",       "Linux ??",
+            "OperatingSystemNameVersionMajor",  "Linux ??",
+            "LayoutEngineClass",                "Browser",
+            "LayoutEngineName",                 "Blink",
+            "LayoutEngineVersion",              "100.0",
+            "LayoutEngineVersionMajor",         "100",
+            "LayoutEngineNameVersion",          "Blink 100.0",
+            "LayoutEngineNameVersionMajor",     "Blink 100",
+            "AgentClass",                       "Browser",
+            "AgentName",                        "Chrome",
+            "AgentVersion",                     "100.0.4896.127",
+            "AgentVersionMajor",                "100",
+            "AgentNameVersion",                 "Chrome 100.0.4896.127",
+            "AgentNameVersionMajor",            "Chrome 100"
+          )
+        )
+      )
+      .go();
+  }
+
+  /**
+   * Converts a more readable list of keys and values into what the ClusterTest supports.
+   * @param strings List of  ["key", "value"]
+   * @return A Map of the same keys and values that is in the right format.
+   */
+  private Map<String, Object> expectations(String... strings) {
+    Map<String, Object> expectations = new LinkedHashMap<>();
+    int index = 0;
+    assertEquals("The number of arguments for 'expectations' must be even", 0, strings.length % 2);
+
+    while (index < strings.length) {
+      expectations.put(ExpressionStringBuilder.toString(parsePath(strings[index])), strings[index+1]);
+      index+=2;
+    }
+    return expectations;
+  }
+
 }
diff --git a/pom.xml b/pom.xml
index bff9f8b6c7..10b692b8fe 100644
--- a/pom.xml
+++ b/pom.xml
@@ -134,8 +134,9 @@
     <xerces.version>2.12.2</xerces.version>
     <commons.configuration.version>1.10</commons.configuration.version>
     <commons.beanutils.version>1.9.4</commons.beanutils.version>
-    <httpdlog-parser.version>5.7</httpdlog-parser.version>
-    <yauaa.version>5.20</yauaa.version>
+    <httpdlog-parser.version>5.8</httpdlog-parser.version>
+    <yauaa.version>7.1.0</yauaa.version>
+    <log4j.version>2.17.2</log4j.version>
     <aircompressor.version>0.20</aircompressor.version>
     <iceberg.version>0.12.1</iceberg.version>
     <univocity-parsers.version>2.8.3</univocity-parsers.version>