You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2021/10/07 18:55:14 UTC

[pinot] branch master updated: Support JSON queries with top-level array path expression. (#7511)

This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 57a1f3f  Support JSON queries with top-level array path expression. (#7511)
57a1f3f is described below

commit 57a1f3fe9674c57f29c470e3613d68a294fd7604
Author: Amrish Lal <am...@gmail.com>
AuthorDate: Thu Oct 7 11:55:02 2021 -0700

    Support JSON queries with top-level array path expression. (#7511)
    
    * Support JSON queries with top-level array path expression.
    
    * Cleanup.
    
    * Properly detect top-level path expressions.
    
    * add test case for top-level path expressions in JSON_MATCH
    
    * optimize code.
    
    * Map identifier 'column[0]' to json path '0' for compatiblity with JSON_MATCH.
    
    * Rebuild.
    
    * Rebuild.
    
    * codereview changes.
    
    * Rebuild.
    
    * Update pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
    
    Co-authored-by: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
    
    Co-authored-by: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
---
 .../statement/JsonStatementOptimizer.java          | 36 +++++++++---
 .../statement/JsonStatementOptimizerTest.java      | 39 +++++++++++++
 .../apache/pinot/queries/JsonPathQueriesTest.java  | 68 ++++++++++++----------
 3 files changed, 102 insertions(+), 41 deletions(-)

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
index 258c796..d2f2198 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
@@ -24,7 +24,6 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import javax.annotation.Nullable;
-import org.apache.commons.lang.StringUtils;
 import org.apache.pinot.common.function.scalar.ArithmeticFunctions;
 import org.apache.pinot.common.function.scalar.DateTimeFunctions;
 import org.apache.pinot.common.request.Expression;
@@ -384,13 +383,34 @@ public class JsonStatementOptimizer implements StatementOptimizer {
   }
 
   /**
-   *  @return A string array containing all the parts of an identifier. An identifier may have one or more parts that
-   *  are joined together using <DOT>. For example the identifier "testTable.jsonColumn.name.first" consists up of
-   *  "testTable" (name of table), "jsonColumn" (name of column), "name" (json path), and "first" (json path). The last
-   *  two parts when joined together (name.first) represent a JSON path expression.
+   * @return A two element String array where the first element is the column name and second element is the JSON
+   * path expression. If column name is not suffixed by JSON path expression, then array will contain only a single
+   * element representing the column name. For example:
+   * 1) Identifier "jsonColumn.name.first" -> {"jsonColumn", ".name.first"}
+   * 2) Identifier "jsonColumn[0]" -> {"jsonColumn", "[0]"}
+   * 3) Identifier "jsonColumn" -> {"jsonColumn"}
    */
   private static String[] getIdentifierParts(Identifier identifier) {
-    return StringUtils.split(identifier.getName(), '.');
+    String name = identifier.getName();
+    int dotIndex = name.indexOf('.');
+    int openBracketIndex = name.indexOf('[');
+
+    // column name followed by top-level array expression.
+    if (openBracketIndex != -1) {
+      // name has an '[', check if this path expression refers to a top-level JSON array.
+      if (dotIndex == -1 || openBracketIndex < dotIndex) {
+        // This path expression refers to a top-level JSON array.
+        return new String[]{name.substring(0, openBracketIndex), name.substring(openBracketIndex)};
+      }
+    }
+
+    // column name followed by all other JSON path expression
+    if (dotIndex != -1) {
+      return new String[] {name.substring(0, dotIndex), name.substring(dotIndex)};
+    }
+
+    // column name without any JSON path expression
+    return new String[] {name};
   }
 
   /**
@@ -407,9 +427,7 @@ public class JsonStatementOptimizer implements StatementOptimizer {
     }
 
     builder.append("$");
-    for (int i = 1; i < parts.length; i++) {
-      builder.append(".").append(parts[i]);
-    }
+    builder.append(parts[1]);
 
     if (applyDoubleQuote) {
       builder.append("\"");
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
index 2339ad7..32280a4 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
@@ -200,4 +200,43 @@ public class JsonStatementOptimizerTest {
         "SELECT MAX(JSON_EXTRACT_SCALAR(jsonColumn, '$.id', 'DOUBLE', '" + Double.NEGATIVE_INFINITY
             + "') - 5) AS \"max(minus(jsonColumn.id,'5'))\" FROM testTable", TABLE_CONFIG_WITH_INDEX, SCHEMA);
   }
+
+  @Test
+  public void testTopLevelArrayPathExpressions() {
+    // SELECT using json path expression with top-level array addressing.
+    TestHelper.assertEqualsQuery("SELECT jsonColumn[0] FROM testTable",
+        "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0]', 'STRING', 'null') AS \"jsonColumn[0]\" FROM testTable",
+        TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+    TestHelper.assertEqualsQuery("SELECT jsonColumn[0].a FROM testTable",
+        "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0].a', 'STRING', 'null') AS \"jsonColumn[0].a\" FROM testTable",
+        TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+    TestHelper.assertEqualsQuery("SELECT jsonColumn.a[0] FROM testTable",
+        "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$.a[0]', 'STRING', 'null') AS \"jsonColumn.a[0]\" FROM testTable",
+        TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+    TestHelper.assertEqualsQuery("SELECT jsonColumn[1].i2 FROM testTable WHERE jsonColumn[1].i2 IS NOT NULL",
+        "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[1].i2', 'STRING', 'null') AS \"jsonColumn[1].i2\" FROM testTable "
+            + "WHERE JSON_MATCH(jsonColumn, '\"$[1].i2\" IS NOT NULL')",
+        TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+    // Use top-level array addressing in json path expression in JSON_EXTRACT_SCALAR filter.
+    TestHelper.assertEqualsQuery("SELECT * FROM testTable WHERE jsonColumn[2] IS NOT NULL and jsonColumn[2] = 'test'",
+        "SELECT * FROM testTable WHERE JSON_EXTRACT_SCALAR(jsonColumn, '$[2]', 'JSON', 'null') IS NOT NULL AND "
+            + "JSON_EXTRACT_SCALAR(jsonColumn, '$[2]', 'STRING', 'null') = 'test'",
+        TABLE_CONFIG_WITHOUT_INDEX, SCHEMA);
+
+    // Use top-level array addressing in json path expression in JSON_MATCH filter
+    TestHelper.assertEqualsQuery("SELECT * FROM testTable WHERE jsonColumn[2] IS NOT NULL and jsonColumn[2] = 'test'",
+        "SELECT * FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$[2]\" IS NOT NULL') AND JSON_MATCH(jsonColumn, "
+            + "'\"$[2]\" = ''test''')",
+        TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+    // Use top-level array addressing in json path expression in GROUP BY clause.
+    TestHelper.assertEqualsQuery("SELECT jsonColumn[0], count(*) FROM testTable GROUP BY jsonColumn[0]",
+        "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0]', 'STRING', 'null') AS \"jsonColumn[0]\", count(*) FROM "
+            + "testTable GROUP BY JSON_EXTRACT_SCALAR(jsonColumn, '$[0]', 'STRING', 'null')",
+        TABLE_CONFIG_WITH_INDEX, SCHEMA);
+  }
 }
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
index 8eb4c8d..097e6d9 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
@@ -141,6 +141,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
             + "[[\"a\",\"b\"],[\"c\",\"d\"]]]}"));
     records.add(createRecord(13, 13, "days",
         "{\"name\": {\"first\": \"multi-dimensional-1\",\"last\": \"array\"},\"days\": 111}"));
+    records.add(createRecord(14, 14, "top level array", "[{\"i1\":1,\"i2\":2}, {\"i1\":3,\"i2\":4}]"));
 
     List<String> jsonIndexColumns = new ArrayList<>();
     jsonIndexColumns.add("jsonColumn");
@@ -184,12 +185,10 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
     // SELECT using a simple json path expression.
     Object[][] expecteds1 = {{"duck"}, {"mouse"}, {"duck"}};
     checkresult("SELECT jsonColumn.name.last FROM testTable LIMIT 3", expecteds1);
-    //checkresult("SELECT jsonColumnWithoutIndex.name.last FROM testTable LIMIT 3", expecteds1);
 
     Object[][] expecteds2 =
         {{"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"1"}};
     checkresult("SELECT jsonColumn.data[0].e[2].z[0].i1 FROM testTable", expecteds2);
-    //checkresult("SELECT jsonColumnWithoutIndex.data[0].e[2].z[0].i1 FROM testTable", expecteds2);
   }
 
   /** Test that a predicate comparing a json path expression with literal is properly converted into a JSON_MATCH
@@ -197,24 +196,18 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
   @Test
   public void testJsonFilter() {
     // Comparing json path expression with a string value.
-    Object[][] expecteds1 = {
-        {
-            1, "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
+    Object[][] expecteds1 =
+        {{1, "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
             "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}", 1L,
-            "daffy duck"
-        }
-    };
+            "daffy duck"}};
     checkresult("SELECT * FROM testTable WHERE jsonColumn.name.first = 'daffy' LIMIT 1", expecteds1);
     checkresult("SELECT * FROM testTable WHERE jsonColumnWithoutIndex.name.first = 'daffy' LIMIT 1", expecteds1);
 
     // Comparing json path expression with a numerical value.
-    Object[][] expecteds2 = {
-        {
-            1, "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
+    Object[][] expecteds2 =
+        {{1, "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
             "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}", 1L,
-            "daffy duck"
-        }
-    };
+            "daffy duck"}};
     checkresult("SELECT * FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$.id\" = 101') LIMIT 1", expecteds2);
     try {
       checkresult("SELECT * FROM testTable WHERE JSON_MATCH(jsonColumnWithoutIndex, '\"$.id\" = 101') LIMIT 1",
@@ -227,19 +220,16 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
 
     // Comparing json path expression with a string value.
     Object[][] expecteds3 = {{4L}};
-    checkresult(
-        "SELECT count(*) FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$.id\" IS NOT NULL') AND JSON_MATCH"
-            + "(jsonColumn, '\"$.id\" = 101')",
-        expecteds3);
+    checkresult("SELECT count(*) FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$.id\" IS NOT NULL') AND JSON_MATCH"
+        + "(jsonColumn, '\"$.id\" = 101')", expecteds3);
   }
 
   /** Test that a json path expression in GROUP BY clause is properly converted into a JSON_EXTRACT_SCALAR function. */
   @Test
   public void testJsonGroupBy() {
-    Object[][] expecteds1 = {
-        {"111", 20L}, {"101", 4L}, {"null", 4L}, {"181", 4L}, {"161.5", 4L}, {"171", 4L}, {"161", 4L}, {"141", 4L},
-        {"131", 4L}, {"121", 4L}
-    };
+    Object[][] expecteds1 =
+        {{"111", 20L}, {"101", 4L}, {"null", 8L}, {"181", 4L}, {"161.5", 4L}, {"171", 4L}, {"161", 4L}, {"141", 4L},
+            {"131", 4L}, {"121", 4L}};
     checkresult("SELECT jsonColumn.id, count(*) FROM testTable GROUP BY jsonColumn.id", expecteds1);
     checkresult("SELECT jsonColumnWithoutIndex.id, count(*) FROM testTable GROUP BY jsonColumnWithoutIndex.id",
         expecteds1);
@@ -251,12 +241,10 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
     Object[][] expecteds1 = {{"mouse", 8L}};
     checkresult(
         "SELECT jsonColumn.name.last, count(*) FROM testTable GROUP BY jsonColumn.name.last HAVING jsonColumn.name"
-            + ".last = 'mouse'",
-        expecteds1);
+            + ".last = 'mouse'", expecteds1);
     checkresult(
         "SELECT jsonColumnWithoutIndex.name.last, count(*) FROM testTable GROUP BY jsonColumnWithoutIndex.name.last "
-            + "HAVING jsonColumnWithoutIndex.name.last = 'mouse'",
-        expecteds1);
+            + "HAVING jsonColumnWithoutIndex.name.last = 'mouse'", expecteds1);
   }
 
   /** Test a complex SQL statement with json path expression in SELECT, WHERE, and GROUP BY clauses. */
@@ -268,8 +256,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
         expecteds1);
     checkresult(
         "SELECT jsonColumnWithoutIndex.name.last, count(*) FROM testTable WHERE jsonColumnWithoutIndex.id = 101 GROUP"
-            + " BY jsonColumnWithoutIndex.name.last",
-        expecteds1);
+            + " BY jsonColumnWithoutIndex.name.last", expecteds1);
   }
 
   /** Test an aggregation function over json path expression in SELECT clause. */
@@ -287,8 +274,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
     try {
       checkresult(
           "SELECT FROMEPOCHDAYS(jsonColumnWithoutIndex.days) FROM testTable WHERE jsonColumnWithoutIndex.days IS NULL"
-              + " LIMIT 1",
-          expecteds2);
+              + " LIMIT 1", expecteds2);
       Assert.fail();
     } catch (BadQueryRequestException e) {
       Assert
@@ -302,8 +288,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
     try {
       checkresult(
           "SELECT FROMEPOCHDAYS(jsonColumnWithoutIndex.days) FROM testTable WHERE jsonColumnWithoutIndex.days IS NOT "
-              + "NULL LIMIT 1",
-          expecteds3);
+              + "NULL LIMIT 1", expecteds3);
       Assert.fail();
     } catch (BadQueryRequestException e) {
       Assert.assertEquals(e.getMessage(),
@@ -330,4 +315,23 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
     checkresult("SELECT MAX(jsonColumn.id - 5) FROM testTable", expecteds3);
     checkresult("SELECT MAX(jsonColumnWithoutIndex.id - 5) FROM testTable", expecteds3);
   }
+
+  @Test
+  public void testTopLevelArrayPathExpressions() {
+    // SELECT using json path expressions that refers to second element of a top-level array.
+    Object[][] expecteds1 = {{"{\"i1\":3,\"i2\":4}"}, {"{\"i1\":3,\"i2\":4}"}, {"{\"i1\":3,\"i2\":4}"}, {"{\"i1\":3,"
+        + "\"i2\":4}"}};
+    checkresult("SELECT jsonColumn[1] FROM testTable WHERE intColumn=14", expecteds1);
+
+    // SELECT using json path expressions that refers to item within second element of a top-level array.
+    Object[][] expecteds2 = {{"4"}, {"4"}, {"4"}, {"4"}};
+    checkresult("SELECT jsonColumn[1].i2 FROM testTable WHERE intColumn=14", expecteds2);
+
+    // SELECT using json path expression and check path expression for IS NULL.
+    checkresult("SELECT jsonColumn[1].i2 FROM testTable WHERE jsonColumn[1].i2 IS NOT NULL", expecteds2);
+
+    // GROUP BY using a json path expression that refers to a top-level array element.
+    Object[][] expecteds3 = {{"{\"i1\":3,\"i2\":4}", 4L}, {"null", 56L}};
+    checkresult("SELECT jsonColumn[1], count(*) FROM testTable GROUP BY jsonColumn[1]", expecteds3);
+  }
 }

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org