You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2021/10/07 18:55:14 UTC
[pinot] branch master updated: Support JSON queries with top-level
array path expression. (#7511)
This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 57a1f3f Support JSON queries with top-level array path expression. (#7511)
57a1f3f is described below
commit 57a1f3fe9674c57f29c470e3613d68a294fd7604
Author: Amrish Lal <am...@gmail.com>
AuthorDate: Thu Oct 7 11:55:02 2021 -0700
Support JSON queries with top-level array path expression. (#7511)
* Support JSON queries with top-level array path expression.
* Cleanup.
* Properly detect top-level path expressions.
* add test case for top-level path expressions in JSON_MATCH
* optimize code.
* Map identifier 'column[0]' to json path '0' for compatiblity with JSON_MATCH.
* Rebuild.
* Rebuild.
* codereview changes.
* Rebuild.
* Update pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
Co-authored-by: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
Co-authored-by: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
---
.../statement/JsonStatementOptimizer.java | 36 +++++++++---
.../statement/JsonStatementOptimizerTest.java | 39 +++++++++++++
.../apache/pinot/queries/JsonPathQueriesTest.java | 68 ++++++++++++----------
3 files changed, 102 insertions(+), 41 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
index 258c796..d2f2198 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizer.java
@@ -24,7 +24,6 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import javax.annotation.Nullable;
-import org.apache.commons.lang.StringUtils;
import org.apache.pinot.common.function.scalar.ArithmeticFunctions;
import org.apache.pinot.common.function.scalar.DateTimeFunctions;
import org.apache.pinot.common.request.Expression;
@@ -384,13 +383,34 @@ public class JsonStatementOptimizer implements StatementOptimizer {
}
/**
- * @return A string array containing all the parts of an identifier. An identifier may have one or more parts that
- * are joined together using <DOT>. For example the identifier "testTable.jsonColumn.name.first" consists up of
- * "testTable" (name of table), "jsonColumn" (name of column), "name" (json path), and "first" (json path). The last
- * two parts when joined together (name.first) represent a JSON path expression.
+ * @return A two element String array where the first element is the column name and second element is the JSON
+ * path expression. If column name is not suffixed by JSON path expression, then array will contain only a single
+ * element representing the column name. For example:
+ * 1) Identifier "jsonColumn.name.first" -> {"jsonColumn", ".name.first"}
+ * 2) Identifier "jsonColumn[0]" -> {"jsonColumn", "[0]"}
+ * 3) Identifier "jsonColumn" -> {"jsonColumn"}
*/
private static String[] getIdentifierParts(Identifier identifier) {
- return StringUtils.split(identifier.getName(), '.');
+ String name = identifier.getName();
+ int dotIndex = name.indexOf('.');
+ int openBracketIndex = name.indexOf('[');
+
+ // column name followed by top-level array expression.
+ if (openBracketIndex != -1) {
+ // name has an '[', check if this path expression refers to a top-level JSON array.
+ if (dotIndex == -1 || openBracketIndex < dotIndex) {
+ // This path expression refers to a top-level JSON array.
+ return new String[]{name.substring(0, openBracketIndex), name.substring(openBracketIndex)};
+ }
+ }
+
+ // column name followed by all other JSON path expression
+ if (dotIndex != -1) {
+ return new String[] {name.substring(0, dotIndex), name.substring(dotIndex)};
+ }
+
+ // column name without any JSON path expression
+ return new String[] {name};
}
/**
@@ -407,9 +427,7 @@ public class JsonStatementOptimizer implements StatementOptimizer {
}
builder.append("$");
- for (int i = 1; i < parts.length; i++) {
- builder.append(".").append(parts[i]);
- }
+ builder.append(parts[1]);
if (applyDoubleQuote) {
builder.append("\"");
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
index 2339ad7..32280a4 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/query/optimizer/statement/JsonStatementOptimizerTest.java
@@ -200,4 +200,43 @@ public class JsonStatementOptimizerTest {
"SELECT MAX(JSON_EXTRACT_SCALAR(jsonColumn, '$.id', 'DOUBLE', '" + Double.NEGATIVE_INFINITY
+ "') - 5) AS \"max(minus(jsonColumn.id,'5'))\" FROM testTable", TABLE_CONFIG_WITH_INDEX, SCHEMA);
}
+
+ @Test
+ public void testTopLevelArrayPathExpressions() {
+ // SELECT using json path expression with top-level array addressing.
+ TestHelper.assertEqualsQuery("SELECT jsonColumn[0] FROM testTable",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0]', 'STRING', 'null') AS \"jsonColumn[0]\" FROM testTable",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ TestHelper.assertEqualsQuery("SELECT jsonColumn[0].a FROM testTable",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0].a', 'STRING', 'null') AS \"jsonColumn[0].a\" FROM testTable",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ TestHelper.assertEqualsQuery("SELECT jsonColumn.a[0] FROM testTable",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$.a[0]', 'STRING', 'null') AS \"jsonColumn.a[0]\" FROM testTable",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ TestHelper.assertEqualsQuery("SELECT jsonColumn[1].i2 FROM testTable WHERE jsonColumn[1].i2 IS NOT NULL",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[1].i2', 'STRING', 'null') AS \"jsonColumn[1].i2\" FROM testTable "
+ + "WHERE JSON_MATCH(jsonColumn, '\"$[1].i2\" IS NOT NULL')",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ // Use top-level array addressing in json path expression in JSON_EXTRACT_SCALAR filter.
+ TestHelper.assertEqualsQuery("SELECT * FROM testTable WHERE jsonColumn[2] IS NOT NULL and jsonColumn[2] = 'test'",
+ "SELECT * FROM testTable WHERE JSON_EXTRACT_SCALAR(jsonColumn, '$[2]', 'JSON', 'null') IS NOT NULL AND "
+ + "JSON_EXTRACT_SCALAR(jsonColumn, '$[2]', 'STRING', 'null') = 'test'",
+ TABLE_CONFIG_WITHOUT_INDEX, SCHEMA);
+
+ // Use top-level array addressing in json path expression in JSON_MATCH filter
+ TestHelper.assertEqualsQuery("SELECT * FROM testTable WHERE jsonColumn[2] IS NOT NULL and jsonColumn[2] = 'test'",
+ "SELECT * FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$[2]\" IS NOT NULL') AND JSON_MATCH(jsonColumn, "
+ + "'\"$[2]\" = ''test''')",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+
+ // Use top-level array addressing in json path expression in GROUP BY clause.
+ TestHelper.assertEqualsQuery("SELECT jsonColumn[0], count(*) FROM testTable GROUP BY jsonColumn[0]",
+ "SELECT JSON_EXTRACT_SCALAR(jsonColumn, '$[0]', 'STRING', 'null') AS \"jsonColumn[0]\", count(*) FROM "
+ + "testTable GROUP BY JSON_EXTRACT_SCALAR(jsonColumn, '$[0]', 'STRING', 'null')",
+ TABLE_CONFIG_WITH_INDEX, SCHEMA);
+ }
}
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
index 8eb4c8d..097e6d9 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonPathQueriesTest.java
@@ -141,6 +141,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
+ "[[\"a\",\"b\"],[\"c\",\"d\"]]]}"));
records.add(createRecord(13, 13, "days",
"{\"name\": {\"first\": \"multi-dimensional-1\",\"last\": \"array\"},\"days\": 111}"));
+ records.add(createRecord(14, 14, "top level array", "[{\"i1\":1,\"i2\":2}, {\"i1\":3,\"i2\":4}]"));
List<String> jsonIndexColumns = new ArrayList<>();
jsonIndexColumns.add("jsonColumn");
@@ -184,12 +185,10 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
// SELECT using a simple json path expression.
Object[][] expecteds1 = {{"duck"}, {"mouse"}, {"duck"}};
checkresult("SELECT jsonColumn.name.last FROM testTable LIMIT 3", expecteds1);
- //checkresult("SELECT jsonColumnWithoutIndex.name.last FROM testTable LIMIT 3", expecteds1);
Object[][] expecteds2 =
{{"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"null"}, {"1"}};
checkresult("SELECT jsonColumn.data[0].e[2].z[0].i1 FROM testTable", expecteds2);
- //checkresult("SELECT jsonColumnWithoutIndex.data[0].e[2].z[0].i1 FROM testTable", expecteds2);
}
/** Test that a predicate comparing a json path expression with literal is properly converted into a JSON_MATCH
@@ -197,24 +196,18 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
@Test
public void testJsonFilter() {
// Comparing json path expression with a string value.
- Object[][] expecteds1 = {
- {
- 1, "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
+ Object[][] expecteds1 =
+ {{1, "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
"{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}", 1L,
- "daffy duck"
- }
- };
+ "daffy duck"}};
checkresult("SELECT * FROM testTable WHERE jsonColumn.name.first = 'daffy' LIMIT 1", expecteds1);
checkresult("SELECT * FROM testTable WHERE jsonColumnWithoutIndex.name.first = 'daffy' LIMIT 1", expecteds1);
// Comparing json path expression with a numerical value.
- Object[][] expecteds2 = {
- {
- 1, "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
+ Object[][] expecteds2 =
+ {{1, "{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}",
"{\"name\":{\"first\":\"daffy\",\"last\":\"duck\"},\"id\":101,\"data\":[\"a\",\"b\",\"c\",\"d\"]}", 1L,
- "daffy duck"
- }
- };
+ "daffy duck"}};
checkresult("SELECT * FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$.id\" = 101') LIMIT 1", expecteds2);
try {
checkresult("SELECT * FROM testTable WHERE JSON_MATCH(jsonColumnWithoutIndex, '\"$.id\" = 101') LIMIT 1",
@@ -227,19 +220,16 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
// Comparing json path expression with a string value.
Object[][] expecteds3 = {{4L}};
- checkresult(
- "SELECT count(*) FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$.id\" IS NOT NULL') AND JSON_MATCH"
- + "(jsonColumn, '\"$.id\" = 101')",
- expecteds3);
+ checkresult("SELECT count(*) FROM testTable WHERE JSON_MATCH(jsonColumn, '\"$.id\" IS NOT NULL') AND JSON_MATCH"
+ + "(jsonColumn, '\"$.id\" = 101')", expecteds3);
}
/** Test that a json path expression in GROUP BY clause is properly converted into a JSON_EXTRACT_SCALAR function. */
@Test
public void testJsonGroupBy() {
- Object[][] expecteds1 = {
- {"111", 20L}, {"101", 4L}, {"null", 4L}, {"181", 4L}, {"161.5", 4L}, {"171", 4L}, {"161", 4L}, {"141", 4L},
- {"131", 4L}, {"121", 4L}
- };
+ Object[][] expecteds1 =
+ {{"111", 20L}, {"101", 4L}, {"null", 8L}, {"181", 4L}, {"161.5", 4L}, {"171", 4L}, {"161", 4L}, {"141", 4L},
+ {"131", 4L}, {"121", 4L}};
checkresult("SELECT jsonColumn.id, count(*) FROM testTable GROUP BY jsonColumn.id", expecteds1);
checkresult("SELECT jsonColumnWithoutIndex.id, count(*) FROM testTable GROUP BY jsonColumnWithoutIndex.id",
expecteds1);
@@ -251,12 +241,10 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
Object[][] expecteds1 = {{"mouse", 8L}};
checkresult(
"SELECT jsonColumn.name.last, count(*) FROM testTable GROUP BY jsonColumn.name.last HAVING jsonColumn.name"
- + ".last = 'mouse'",
- expecteds1);
+ + ".last = 'mouse'", expecteds1);
checkresult(
"SELECT jsonColumnWithoutIndex.name.last, count(*) FROM testTable GROUP BY jsonColumnWithoutIndex.name.last "
- + "HAVING jsonColumnWithoutIndex.name.last = 'mouse'",
- expecteds1);
+ + "HAVING jsonColumnWithoutIndex.name.last = 'mouse'", expecteds1);
}
/** Test a complex SQL statement with json path expression in SELECT, WHERE, and GROUP BY clauses. */
@@ -268,8 +256,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
expecteds1);
checkresult(
"SELECT jsonColumnWithoutIndex.name.last, count(*) FROM testTable WHERE jsonColumnWithoutIndex.id = 101 GROUP"
- + " BY jsonColumnWithoutIndex.name.last",
- expecteds1);
+ + " BY jsonColumnWithoutIndex.name.last", expecteds1);
}
/** Test an aggregation function over json path expression in SELECT clause. */
@@ -287,8 +274,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
try {
checkresult(
"SELECT FROMEPOCHDAYS(jsonColumnWithoutIndex.days) FROM testTable WHERE jsonColumnWithoutIndex.days IS NULL"
- + " LIMIT 1",
- expecteds2);
+ + " LIMIT 1", expecteds2);
Assert.fail();
} catch (BadQueryRequestException e) {
Assert
@@ -302,8 +288,7 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
try {
checkresult(
"SELECT FROMEPOCHDAYS(jsonColumnWithoutIndex.days) FROM testTable WHERE jsonColumnWithoutIndex.days IS NOT "
- + "NULL LIMIT 1",
- expecteds3);
+ + "NULL LIMIT 1", expecteds3);
Assert.fail();
} catch (BadQueryRequestException e) {
Assert.assertEquals(e.getMessage(),
@@ -330,4 +315,23 @@ public class JsonPathQueriesTest extends BaseQueriesTest {
checkresult("SELECT MAX(jsonColumn.id - 5) FROM testTable", expecteds3);
checkresult("SELECT MAX(jsonColumnWithoutIndex.id - 5) FROM testTable", expecteds3);
}
+
+ @Test
+ public void testTopLevelArrayPathExpressions() {
+ // SELECT using json path expressions that refers to second element of a top-level array.
+ Object[][] expecteds1 = {{"{\"i1\":3,\"i2\":4}"}, {"{\"i1\":3,\"i2\":4}"}, {"{\"i1\":3,\"i2\":4}"}, {"{\"i1\":3,"
+ + "\"i2\":4}"}};
+ checkresult("SELECT jsonColumn[1] FROM testTable WHERE intColumn=14", expecteds1);
+
+ // SELECT using json path expressions that refers to item within second element of a top-level array.
+ Object[][] expecteds2 = {{"4"}, {"4"}, {"4"}, {"4"}};
+ checkresult("SELECT jsonColumn[1].i2 FROM testTable WHERE intColumn=14", expecteds2);
+
+ // SELECT using json path expression and check path expression for IS NULL.
+ checkresult("SELECT jsonColumn[1].i2 FROM testTable WHERE jsonColumn[1].i2 IS NOT NULL", expecteds2);
+
+ // GROUP BY using a json path expression that refers to a top-level array element.
+ Object[][] expecteds3 = {{"{\"i1\":3,\"i2\":4}", 4L}, {"null", 56L}};
+ checkresult("SELECT jsonColumn[1], count(*) FROM testTable GROUP BY jsonColumn[1]", expecteds3);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org