You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by dz...@apache.org on 2022/12/22 13:05:37 UTC

[drill] 06/13: DRILL-8296: Possible type mismatch bug in SplunkBatchReader (#2700)

This is an automated email from the ASF dual-hosted git repository.

dzamo pushed a commit to branch 1.20
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 721e2a40a09a3acae25dc41ae019698b3a377b5c
Author: James Turton <91...@users.noreply.github.com>
AuthorDate: Thu Nov 3 17:07:11 2022 +0200

    DRILL-8296: Possible type mismatch bug in SplunkBatchReader (#2700)
---
 .../drill/exec/store/splunk/SplunkBatchReader.java | 29 ++------------
 .../exec/store/splunk/SplunkQueryBuilder.java      | 45 ++++++++--------------
 .../drill/exec/store/splunk/SplunkUtils.java       | 24 ++++++------
 .../exec/store/splunk/SplunkTestSplunkUtils.java   | 16 ++++----
 4 files changed, 37 insertions(+), 77 deletions(-)

diff --git a/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkBatchReader.java b/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkBatchReader.java
index 9932801936..bab6df37bd 100644
--- a/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkBatchReader.java
+++ b/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkBatchReader.java
@@ -35,7 +35,6 @@ import org.apache.drill.exec.physical.resultSet.RowSetLoader;
 import org.apache.drill.exec.record.metadata.SchemaBuilder;
 import org.apache.drill.exec.record.metadata.TupleMetadata;
 import org.apache.drill.exec.store.base.filter.ExprNode;
-import org.apache.drill.exec.util.Utilities;
 import org.apache.drill.exec.vector.ValueVector;
 import org.apache.drill.exec.vector.accessor.ScalarWriter;
 import org.apache.drill.shaded.guava.com.google.common.base.Stopwatch;
@@ -213,28 +212,6 @@ public class SplunkBatchReader implements ManagedReader<SchemaNegotiator> {
     return true;
   }
 
-  /**
-   * Checks to see whether the query is a star query. For our purposes, the star query is
-   * anything that contains only the ** and the SPECIAL_COLUMNS which are not projected.
-   * @return true if it is a star query, false if not.
-   */
-  private boolean isStarQuery() {
-    if (Utilities.isStarQuery(projectedColumns)) {
-      return true;
-    }
-
-    List<SplunkUtils.SPECIAL_FIELDS> specialFields = Arrays.asList(SplunkUtils.SPECIAL_FIELDS.values());
-
-    for (SchemaPath path: projectedColumns) {
-      if (path.nameEquals("**")) {
-        return true;
-      } else {
-        return specialFields.contains(path.getAsNamePart());
-      }
-    }
-    return false;
-  }
-
   /**
    * Determines whether a field is a Splunk multifield.
    * @param fieldValue The field to be tested
@@ -312,9 +289,9 @@ public class SplunkBatchReader implements ManagedReader<SchemaNegotiator> {
       filters.remove("sourcetype");
     }
 
-    // Pushdown the selected fields for non star queries.
-    if (! isStarQuery()) {
-      builder.addField(projectedColumns);
+    // Add projected columns, skipping star and specials.
+    for (SchemaPath projectedColumn: projectedColumns) {
+      builder.addField(projectedColumn.getAsUnescapedPath());
     }
 
     // Apply filters
diff --git a/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkQueryBuilder.java b/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkQueryBuilder.java
index c13c08e454..0d83fddee9 100644
--- a/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkQueryBuilder.java
+++ b/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkQueryBuilder.java
@@ -23,7 +23,6 @@ import org.apache.drill.exec.store.base.filter.ExprNode;
 import org.apache.drill.exec.store.base.filter.RelOp;
 import org.apache.drill.shaded.guava.com.google.common.base.Strings;
 
-import java.util.List;
 import java.util.Map;
 
 public class SplunkQueryBuilder {
@@ -36,7 +35,7 @@ public class SplunkQueryBuilder {
 
   private String query;
   private String sourceTypes;
-  private String fieldList;
+  private String concatenatedFields;
   private String filters;
   private int sourcetypeCount;
   private int limit;
@@ -69,35 +68,21 @@ public class SplunkQueryBuilder {
    * Splunk accepts arguments in the format | fields foo, bar, car.  This function adds these fields to the query.
    * As an error preventative measure, this function will ignore ** from Drill.
    * @param field The field to be added to the query
+   * @return true if the field was added, false if it was skipped
    */
-  public void addField (String field) {
+  public boolean addField(String field) {
     // Double Star fields cause errors and we will not add to the field list
-    if (field.equalsIgnoreCase("**") || Strings.isNullOrEmpty(field)) {
-      return;
+    if (SchemaPath.DYNAMIC_STAR.equals(field) || SplunkUtils.SPECIAL_FIELDS.includes(field)) {
+      return false;
     }
 
     // Case for first field
-    if (fieldList == null) {
-      this.fieldList = field;
+    if (concatenatedFields == null) {
+      this.concatenatedFields = field;
     } else {
-      this.fieldList += "," + field;
-    }
-  }
-
-  /**
-   * Creates the field list of r
-   * As an error preventative measure, this function will ignore ** from Drill.
-   * @param columnList SchemaPath of columns to be added to the field list
-   */
-  public void addField (List<SchemaPath> columnList) {
-    for (SchemaPath column : columnList) {
-      String columnName = column.getAsUnescapedPath();
-      if (columnName.equalsIgnoreCase("**") || Strings.isNullOrEmpty(columnName)) {
-        continue;
-      } else {
-        addField(columnName);
-      }
+      this.concatenatedFields += "," + field;
     }
+    return true;
   }
 
   /**
@@ -153,7 +138,7 @@ public class SplunkQueryBuilder {
       String value = ((ExprNode.ColRelOpConstNode)filter.getValue()).value.value.toString();
 
       // Ignore special cases
-      if (SplunkUtils.isSpecialField(fieldName)) {
+      if (SplunkUtils.SPECIAL_FIELDS.includes(fieldName)) {
         // Sourcetypes are a special case and can be added via filter pushdown
         if (fieldName.equalsIgnoreCase("sourcetype")) {
           addSourceType(value);
@@ -226,8 +211,8 @@ public class SplunkQueryBuilder {
     }
 
     // Add fields
-    if (! Strings.isNullOrEmpty(fieldList)) {
-      query += " | fields " + fieldList;
+    if (! Strings.isNullOrEmpty(concatenatedFields)) {
+      query += " | fields " + concatenatedFields;
     }
 
     // Add limit
@@ -236,10 +221,10 @@ public class SplunkQueryBuilder {
     }
 
     // Add table logic. This tells Splunk to return the data in tabular form rather than the mess that it usually generates
-    if ( Strings.isNullOrEmpty(fieldList)) {
-      fieldList = "*";
+    if ( Strings.isNullOrEmpty(concatenatedFields)) {
+      concatenatedFields = "*";
     }
-    query += " | table " + fieldList;
+    query += " | table " + concatenatedFields;
 
     return query;
   }
diff --git a/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkUtils.java b/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkUtils.java
index f34f64dcff..a859f2cf5a 100644
--- a/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkUtils.java
+++ b/contrib/storage-splunk/src/main/java/org/apache/drill/exec/store/splunk/SplunkUtils.java
@@ -18,6 +18,8 @@
 
 package org.apache.drill.exec.store.splunk;
 
+import java.util.Arrays;
+
 public class SplunkUtils {
   /**
    * These are special fields that alter the queries sent to Splunk.
@@ -46,19 +48,15 @@ public class SplunkUtils {
       this.field = field;
     }
 
-  }
-
-  /**
-   * Indicates whether the field in question is a special field and should be pushed down to the query or not.
-   * @param unknownField The field to be pushed down
-   * @return true if the field is a special field, false if not.
-   */
-  public static boolean isSpecialField (String unknownField) {
-    for (SPECIAL_FIELDS specialFieldName : SPECIAL_FIELDS.values()) {
-      if (specialFieldName.field.equalsIgnoreCase(unknownField)) {
-        return true;
-      }
+    /**
+     * Indicates whether the field in question is a special field and should be
+     * pushed down to the query or not.
+     * @param unknownField The field to be pushed down
+     * @return true if the field is a special field, false if not.
+     */
+    public static boolean includes(String field) {
+      return Arrays.stream(SplunkUtils.SPECIAL_FIELDS.values())
+        .anyMatch(special -> field.equals(special.name()));
     }
-    return false;
   }
 }
diff --git a/contrib/storage-splunk/src/test/java/org/apache/drill/exec/store/splunk/SplunkTestSplunkUtils.java b/contrib/storage-splunk/src/test/java/org/apache/drill/exec/store/splunk/SplunkTestSplunkUtils.java
index f60dafbcb1..a4a2e643ac 100644
--- a/contrib/storage-splunk/src/test/java/org/apache/drill/exec/store/splunk/SplunkTestSplunkUtils.java
+++ b/contrib/storage-splunk/src/test/java/org/apache/drill/exec/store/splunk/SplunkTestSplunkUtils.java
@@ -31,17 +31,17 @@ public class SplunkTestSplunkUtils {
 
   @Test
   public void testIsSpecialField() {
-    assertTrue(SplunkUtils.isSpecialField("sourcetype"));
-    assertTrue(SplunkUtils.isSpecialField("earliestTime"));
-    assertTrue(SplunkUtils.isSpecialField("latestTime"));
-    assertTrue(SplunkUtils.isSpecialField("spl"));
+    assertTrue(SplunkUtils.SPECIAL_FIELDS.includes("sourcetype"));
+    assertTrue(SplunkUtils.SPECIAL_FIELDS.includes("earliestTime"));
+    assertTrue(SplunkUtils.SPECIAL_FIELDS.includes("latestTime"));
+    assertTrue(SplunkUtils.SPECIAL_FIELDS.includes("spl"));
   }
 
   @Test
   public void testIsNotSpecialField() {
-    assertFalse(SplunkUtils.isSpecialField("bob"));
-    assertFalse(SplunkUtils.isSpecialField("ip_address"));
-    assertFalse(SplunkUtils.isSpecialField("mac_address"));
-    assertFalse(SplunkUtils.isSpecialField("latest_Time"));
+    assertFalse(SplunkUtils.SPECIAL_FIELDS.includes("bob"));
+    assertFalse(SplunkUtils.SPECIAL_FIELDS.includes("ip_address"));
+    assertFalse(SplunkUtils.SPECIAL_FIELDS.includes("mac_address"));
+    assertFalse(SplunkUtils.SPECIAL_FIELDS.includes("latest_Time"));
   }
 }