You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2021/05/08 02:52:53 UTC
[incubator-pinot] branch master updated: JSON column datatype
support. (#6878)
This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new bd4239f JSON column datatype support. (#6878)
bd4239f is described below
commit bd4239fc6908096f60ead9f1ee2c3576f256618b
Author: Amrish Lal <am...@gmail.com>
AuthorDate: Fri May 7 19:52:24 2021 -0700
JSON column datatype support. (#6878)
* JSON column datatype support.
* code review changes + additional test cases.
* code review changes.
* Cleanup.
* Rebuild.
* Adjust for UTC based timestamp.
* Adjust for UTC based timestamp.
* Adjust for UTC based timestamp.
* Cleanup.
* code review changes.
* Fix test case.
* add TODO comment.
* add TODO comment.
* Rebuild.
* Rebuild.
---
.../org/apache/pinot/common/utils/DataSchema.java | 9 ++
.../apache/pinot/common/utils/PinotDataType.java | 86 ++++++++++++++++++-
.../apache/pinot/common/data/FieldSpecTest.java | 12 +++
.../pinot/common/utils/PinotDataTypeTest.java | 98 ++++++++++++++++------
.../transform/function/BaseTransformFunction.java | 2 +
.../transform/function/CastTransformFunction.java | 3 +
...tchPredicateTest.java => JsonDatatypeTest.java} | 58 +++++--------
.../pinot/queries/JsonMatchPredicateTest.java | 3 +
.../ColumnMinMaxValueGenerator.java | 8 +-
.../recordtransformer/RecordTransformerTest.java | 7 ++
.../java/org/apache/pinot/spi/data/FieldSpec.java | 9 ++
.../java/org/apache/pinot/spi/data/Schema.java | 1 +
.../java/org/apache/pinot/spi/utils/JsonUtils.java | 1 +
13 files changed, 230 insertions(+), 67 deletions(-)
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
index 409fd80..b871c0e 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/DataSchema.java
@@ -243,6 +243,7 @@ public class DataSchema {
BOOLEAN /* Stored as INT */,
TIMESTAMP /* Stored as LONG */,
STRING,
+ JSON /* Stored as STRING */,
BYTES,
OBJECT,
INT_ARRAY,
@@ -260,6 +261,8 @@ public class DataSchema {
return INT;
case TIMESTAMP:
return LONG;
+ case JSON:
+ return STRING;
default:
return this;
}
@@ -308,6 +311,8 @@ public class DataSchema {
return DataType.TIMESTAMP;
case STRING:
return DataType.STRING;
+ case JSON:
+ return DataType.JSON;
case BYTES:
return DataType.BYTES;
default:
@@ -334,6 +339,7 @@ public class DataSchema {
case TIMESTAMP:
return new Timestamp((Long) value);
case STRING:
+ case JSON:
return value.toString();
case BYTES:
return ((ByteArray) value).getBytes();
@@ -421,6 +427,7 @@ public class DataSchema {
case TIMESTAMP:
return new Timestamp((Long) value).toString();
case STRING:
+ case JSON:
return value.toString();
case BYTES:
return ((ByteArray) value).toHexString();
@@ -495,6 +502,8 @@ public class DataSchema {
return TIMESTAMP;
case STRING:
return STRING;
+ case JSON:
+ return JSON;
case BYTES:
return BYTES;
default:
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java b/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java
index ee9b02e..43eaf30 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/utils/PinotDataType.java
@@ -19,12 +19,14 @@
package org.apache.pinot.common.utils;
import java.sql.Timestamp;
+import java.util.Base64;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.utils.BooleanUtils;
import org.apache.pinot.spi.utils.BytesUtils;
+import org.apache.pinot.spi.utils.JsonUtils;
import org.apache.pinot.spi.utils.TimestampUtils;
@@ -538,6 +540,60 @@ public enum PinotDataType {
}
},
+ JSON {
+ @Override
+ public int toInt(Object value) {
+ return Integer.parseInt(value.toString().trim());
+ }
+
+ @Override
+ public long toLong(Object value) {
+ return Long.parseLong(value.toString().trim());
+ }
+
+ @Override
+ public float toFloat(Object value) {
+ return Float.parseFloat(value.toString());
+ }
+
+ @Override
+ public double toDouble(Object value) {
+ return Double.parseDouble(value.toString());
+ }
+
+ @Override
+ public boolean toBoolean(Object value) {
+ return Boolean.parseBoolean(value.toString().trim());
+ }
+
+ @Override
+ public Timestamp toTimestamp(Object value) {
+ return TimestampUtils.toTimestamp(value.toString().trim());
+ }
+
+ @Override
+ public String toString(Object value) {
+ return value.toString();
+ }
+
+ @Override
+ public byte[] toBytes(Object value) {
+ // Base64 encoding is the commonly used mechanism for encoding binary data in JSON documents. Note that
+ // toJson function converts byte[] into a Base64 encoded json string value.
+ try {
+ return Base64.getDecoder().decode(value.toString());
+ } catch (Exception e) {
+ throw new RuntimeException(
+ "Unable to convert JSON base64 encoded string value to BYTES. Input value: " + value, e);
+ }
+ }
+
+ @Override
+ public String convert(Object value, PinotDataType sourceType) {
+ return sourceType.toJson(value);
+ }
+ },
+
BYTES {
@Override
public int toInt(Object value) {
@@ -717,7 +773,8 @@ public enum PinotDataType {
OBJECT_ARRAY;
/**
- * NOTE: override toInt(), toLong(), toFloat(), toDouble(), toBoolean(), toTimestamp(), toString() and toBytes() for single-value types.
+ * NOTE: override toInt(), toLong(), toFloat(), toDouble(), toBoolean(), toTimestamp(), toString(), and
+ * toBytes() for single-value types.
*/
public int toInt(Object value) {
@@ -748,6 +805,23 @@ public enum PinotDataType {
return getSingleValueType().toString(toObjectArray(value)[0]);
}
+
+ public String toJson(Object value) {
+ if (value instanceof String) {
+ try {
+ return JsonUtils.stringToJsonNode((String) value).toString();
+ } catch (Exception e) {
+ throw new RuntimeException("Unable to convert String into JSON. Input value: " + value, e);
+ }
+ } else {
+ try {
+ return JsonUtils.objectToString(value).toString();
+ } catch (Exception e) {
+ throw new RuntimeException("Unable to convert " + value.getClass().getCanonicalName() + " to JSON. Input value: " + value, e);
+ }
+ }
+ }
+
public byte[] toBytes(Object value) {
return getSingleValueType().toBytes(toObjectArray(value)[0]);
}
@@ -936,7 +1010,7 @@ public enum PinotDataType {
}
public Object convert(Object value, PinotDataType sourceType) {
- throw new UnsupportedOperationException("Cannot convert value form " + sourceType + " to " + this);
+ throw new UnsupportedOperationException("Cannot convert value from " + sourceType + " to " + this);
}
/**
@@ -1011,6 +1085,12 @@ public enum PinotDataType {
} else {
throw new IllegalStateException("There is no multi-value type for TIMESTAMP");
}
+ case JSON:
+ if (fieldSpec.isSingleValueField()) {
+ return PinotDataType.JSON;
+ } else {
+ throw new IllegalStateException("There is no multi-value type for JSON");
+ }
case STRING:
return fieldSpec.isSingleValueField() ? PinotDataType.STRING : PinotDataType.STRING_ARRAY;
case BYTES:
@@ -1045,6 +1125,8 @@ public enum PinotDataType {
return TIMESTAMP;
case STRING:
return STRING;
+ case JSON:
+ return JSON;
case BYTES:
return BYTES;
case INT_ARRAY:
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
index aa7d31e..a020ebc 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/data/FieldSpecTest.java
@@ -58,6 +58,7 @@ public class FieldSpecTest {
Assert.assertEquals(BOOLEAN.getStoredType(), INT);
Assert.assertEquals(TIMESTAMP.getStoredType(), LONG);
Assert.assertEquals(STRING.getStoredType(), STRING);
+ Assert.assertEquals(JSON.getStoredType(), STRING);
Assert.assertEquals(BYTES.getStoredType(), BYTES);
Assert.assertEquals(INT.size(), Integer.BYTES);
@@ -106,6 +107,17 @@ public class FieldSpecTest {
Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode());
Assert.assertEquals(fieldSpec1.getDefaultNullValue(), "null");
+ // Single-value json type dimension field with max length and default null value.
+ fieldSpec1 = new DimensionFieldSpec();
+ fieldSpec1.setName("svDimension");
+ fieldSpec1.setDataType(JSON);
+ fieldSpec1.setMaxLength(20000);
+ fieldSpec2 = new DimensionFieldSpec("svDimension", JSON, true, 20000, null);
+ Assert.assertEquals(fieldSpec1, fieldSpec2);
+ Assert.assertEquals(fieldSpec1.toString(), fieldSpec2.toString());
+ Assert.assertEquals(fieldSpec1.hashCode(), fieldSpec2.hashCode());
+ Assert.assertEquals(fieldSpec1.getDefaultNullValue(), "null");
+
// Multi-value dimension field.
fieldSpec1 = new DimensionFieldSpec();
fieldSpec1.setName("mvDimension");
diff --git a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java
index ca75097..2970b4b 100644
--- a/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java
+++ b/pinot-common/src/test/java/org/apache/pinot/common/utils/PinotDataTypeTest.java
@@ -18,6 +18,11 @@
*/
package org.apache.pinot.common.utils;
+import java.sql.Timestamp;
+import java.time.LocalDateTime;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
import org.testng.annotations.Test;
import static org.apache.pinot.common.utils.PinotDataType.*;
@@ -28,16 +33,16 @@ import static org.testng.Assert.fail;
public class PinotDataTypeTest {
private static final PinotDataType[] SOURCE_TYPES =
- {BYTE, CHARACTER, SHORT, INTEGER, LONG, FLOAT, DOUBLE, STRING, BYTE_ARRAY, CHARACTER_ARRAY, SHORT_ARRAY, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY, STRING_ARRAY};
+ {BYTE, CHARACTER, SHORT, INTEGER, LONG, FLOAT, DOUBLE, STRING, JSON, BYTE_ARRAY, CHARACTER_ARRAY, SHORT_ARRAY, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY, STRING_ARRAY};
private static final Object[] SOURCE_VALUES =
- {(byte) 123, (char) 123, (short) 123, 123, 123L, 123f, 123d, " 123", new Object[]{(byte) 123}, new Object[]{(char) 123}, new Object[]{(short) 123}, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}, new Object[]{" 123"}};
+ {(byte) 123, (char) 123, (short) 123, 123, 123L, 123f, 123d, " 123", "123 ", new Object[]{(byte) 123}, new Object[]{(char) 123}, new Object[]{(short) 123}, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}, new Object[]{" 123"}};
private static final PinotDataType[] DEST_TYPES =
{INTEGER, LONG, FLOAT, DOUBLE, INTEGER_ARRAY, LONG_ARRAY, FLOAT_ARRAY, DOUBLE_ARRAY};
private static final Object[] EXPECTED_DEST_VALUES =
{123, 123L, 123f, 123d, new Object[]{123}, new Object[]{123L}, new Object[]{123f}, new Object[]{123d}};
private static final String[] EXPECTED_STRING_VALUES =
{Byte.toString((byte) 123), Character.toString((char) 123), Short.toString((short) 123), Integer.toString(
- 123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123", Byte.toString(
+ 123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123", "123 ", Byte.toString(
(byte) 123), Character.toString((char) 123), Short.toString((short) 123), Integer.toString(
123), Long.toString(123L), Float.toString(123f), Double.toString(123d), " 123"};
@@ -76,6 +81,9 @@ public class PinotDataTypeTest {
assertEquals(DOUBLE.convert(false, BOOLEAN), 0d);
assertEquals(STRING.convert(true, BOOLEAN), "true");
assertEquals(STRING.convert(false, BOOLEAN), "false");
+
+ assertEquals(BOOLEAN.convert("true", JSON), true);
+ assertEquals(BOOLEAN.convert("false", JSON), false);
}
@Test
@@ -89,11 +97,30 @@ public class PinotDataTypeTest {
assertEquals(STRING.convert(new byte[]{0, 1}, BYTES), "0001");
assertEquals(BYTES.convert("0001", STRING), new byte[]{0, 1});
assertEquals(BYTES.convert(new byte[]{0, 1}, BYTES), new byte[]{0, 1});
+ assertEquals(BYTES.convert("AAE=", JSON), new byte[]{0,1});
assertEquals(BYTES.convert(new Byte[]{0, 1}, BYTE_ARRAY), new byte[]{0, 1});
assertEquals(BYTES.convert(new String[]{"0001"}, STRING_ARRAY), new byte[]{0, 1});
}
@Test
+ public void testTimestamp() {
+ Timestamp timestamp = Timestamp.valueOf(LocalDateTime.now());
+ assertEquals(TIMESTAMP.convert(timestamp.getTime(), LONG), timestamp);
+ assertEquals(TIMESTAMP.convert(timestamp.toString(), STRING), timestamp);
+ assertEquals(TIMESTAMP.convert(timestamp.getTime(), JSON), timestamp);
+ assertEquals(TIMESTAMP.convert(timestamp.toString(), JSON), timestamp);
+ }
+
+ @Test
+ public void testJSON() {
+ assertEquals(JSON.convert(false, BOOLEAN), "false");
+ assertEquals(JSON.convert(true, BOOLEAN), "true");
+ assertEquals(JSON.convert(new byte[]{0, 1}, BYTES), "\"AAE=\""); // Base64 encoding.
+ assertEquals(JSON.convert("{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}", STRING), "{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}");
+ assertEquals(JSON.convert(new Timestamp(1620324238610l), TIMESTAMP), "1620324238610");
+ }
+
+ @Test
public void testObject() {
assertEquals(OBJECT.toInt(new NumberObject("123")), 123);
assertEquals(OBJECT.toLong(new NumberObject("123")), 123L);
@@ -102,43 +129,64 @@ public class PinotDataTypeTest {
assertTrue(OBJECT.toBoolean(new NumberObject("123")));
assertEquals(OBJECT.toTimestamp(new NumberObject("123")).getTime(), 123L);
assertEquals(OBJECT.toString(new NumberObject("123")), "123");
+ assertEquals(OBJECT.toJson(getGenericTestObject()), "{\"bytes\":\"AAE=\",\"map\":{\"key1\":\"value\",\"key2\":null,\"array\":[-5.4,4,\"2\"]},\"timestamp\":1620324238610}");
assertEquals(OBJECT_ARRAY.getSingleValueType(), OBJECT);
}
+ private static Object getGenericTestObject() {
+ Map<String, Object> map1 = new HashMap<>();
+ map1.put("array", Arrays.asList(-5.4,4, "2"));
+ map1.put("key1", "value");
+ map1.put("key2", null);
+
+ Map<String, Object> map2 = new HashMap<>();
+ map2.put("map", map1);
+ map2.put("bytes", new byte[]{0,1});
+ map2.put("timestamp", new Timestamp(1620324238610l));
+
+ return map2;
+ }
+
@Test
public void testInvalidConversion() {
for (PinotDataType sourceType : values()) {
- if (sourceType.isSingleValue() && sourceType != STRING && sourceType != BYTES) {
- assertInvalidConversion(sourceType, BYTES);
+ if (sourceType.isSingleValue() && sourceType != STRING && sourceType != BYTES && sourceType != JSON) {
+ assertInvalidConversion(null, sourceType, BYTES, UnsupportedOperationException.class);
}
}
- assertInvalidConversion(BYTES, INTEGER);
- assertInvalidConversion(BYTES, LONG);
- assertInvalidConversion(BYTES, FLOAT);
- assertInvalidConversion(BYTES, DOUBLE);
- assertInvalidConversion(BYTES, INTEGER_ARRAY);
- assertInvalidConversion(BYTES, LONG_ARRAY);
- assertInvalidConversion(BYTES, FLOAT_ARRAY);
- assertInvalidConversion(BYTES, DOUBLE_ARRAY);
+ assertInvalidConversion(null, BYTES, INTEGER, UnsupportedOperationException.class);
+ assertInvalidConversion(null, BYTES, LONG, UnsupportedOperationException.class);
+ assertInvalidConversion(null, BYTES, FLOAT, UnsupportedOperationException.class);
+ assertInvalidConversion(null, BYTES, DOUBLE, UnsupportedOperationException.class);
+ assertInvalidConversion(null, BYTES, INTEGER_ARRAY, UnsupportedOperationException.class);
+ assertInvalidConversion(null, BYTES, LONG_ARRAY, UnsupportedOperationException.class);
+ assertInvalidConversion(null, BYTES, FLOAT_ARRAY, UnsupportedOperationException.class);
+ assertInvalidConversion(null, BYTES, DOUBLE_ARRAY, UnsupportedOperationException.class);
for (PinotDataType sourceType : values()) {
- assertInvalidConversion(sourceType, BYTE);
- assertInvalidConversion(sourceType, CHARACTER);
- assertInvalidConversion(sourceType, SHORT);
- assertInvalidConversion(sourceType, OBJECT);
- assertInvalidConversion(sourceType, BYTE_ARRAY);
- assertInvalidConversion(sourceType, CHARACTER_ARRAY);
- assertInvalidConversion(sourceType, SHORT_ARRAY);
- assertInvalidConversion(sourceType, OBJECT_ARRAY);
+ assertInvalidConversion(null, sourceType, BYTE, UnsupportedOperationException.class);
+ assertInvalidConversion(null, sourceType, CHARACTER, UnsupportedOperationException.class);
+ assertInvalidConversion(null, sourceType, SHORT, UnsupportedOperationException.class);
+ assertInvalidConversion(null, sourceType, OBJECT, UnsupportedOperationException.class);
+ assertInvalidConversion(null, sourceType, BYTE_ARRAY, UnsupportedOperationException.class);
+ assertInvalidConversion(null, sourceType, CHARACTER_ARRAY, UnsupportedOperationException.class);
+ assertInvalidConversion(null, sourceType, SHORT_ARRAY, UnsupportedOperationException.class);
+ assertInvalidConversion(null, sourceType, OBJECT_ARRAY, UnsupportedOperationException.class);
}
+
+ assertInvalidConversion("xyz", STRING, JSON, RuntimeException.class);
+
}
- private void assertInvalidConversion(PinotDataType sourceType, PinotDataType destType) {
+ private void assertInvalidConversion(Object value, PinotDataType sourceType, PinotDataType destType,
+ Class expectedExceptionType) {
try {
- destType.convert(null, sourceType);
- } catch (UnsupportedOperationException e) {
- return;
+ destType.convert(value, sourceType);
+ } catch (Exception e) {
+ if (e.getClass().equals(expectedExceptionType)) {
+ return;
+ }
}
fail();
}
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java
index b504202..66e9111 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/BaseTransformFunction.java
@@ -47,6 +47,8 @@ public abstract class BaseTransformFunction implements TransformFunction {
new TransformResultMetadata(DataType.STRING, true, false);
protected static final TransformResultMetadata STRING_MV_NO_DICTIONARY_METADATA =
new TransformResultMetadata(DataType.STRING, false, false);
+ protected static final TransformResultMetadata JSON_SV_NO_DICTIONARY_METADATA =
+ new TransformResultMetadata(DataType.JSON, true, false);
protected static final TransformResultMetadata BYTES_SV_NO_DICTIONARY_METADATA =
new TransformResultMetadata(DataType.BYTES, true, false);
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java
index 7d7f96c..8428249 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/operator/transform/function/CastTransformFunction.java
@@ -76,6 +76,9 @@ public class CastTransformFunction extends BaseTransformFunction {
case "VARCHAR":
_resultMetadata = STRING_SV_NO_DICTIONARY_METADATA;
break;
+ case "JSON":
+ _resultMetadata = JSON_SV_NO_DICTIONARY_METADATA;
+ break;
default:
throw new IllegalArgumentException("Unable to cast expression to type - " + targetType);
}
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java
similarity index 84%
copy from pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
copy to pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java
index c85fe86..75a0537 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonDatatypeTest.java
@@ -27,6 +27,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.FileUtils;
+import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.core.common.Operator;
import org.apache.pinot.core.operator.blocks.IntermediateResultsBlock;
import org.apache.pinot.segment.local.indexsegment.immutable.ImmutableSegmentLoader;
@@ -50,9 +51,9 @@ import org.testng.annotations.Test;
/**
- * Test cases verifying evaluation of predicate with expressions that contain numerical values of different types.
+ * Test cases verifying query evaluation against column of type JSON.
*/
-public class JsonMatchPredicateTest extends BaseQueriesTest {
+public class JsonDatatypeTest extends BaseQueriesTest {
private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "JsonMatchPredicateTest");
private static final String RAW_TABLE_NAME = "testTable";
private static final String SEGMENT_NAME = "testSegment";
@@ -63,7 +64,7 @@ public class JsonMatchPredicateTest extends BaseQueriesTest {
private static final String STRING_COLUMN = "stringColumn";
private static final Schema SCHEMA =
new Schema.SchemaBuilder().addSingleValueDimension(INT_COLUMN, FieldSpec.DataType.INT)
- .addSingleValueDimension(JSON_COLUMN, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(JSON_COLUMN, FieldSpec.DataType.JSON)
.addSingleValueDimension(STRING_COLUMN, FieldSpec.DataType.STRING).build();
private static final TableConfig TABLE_CONFIG =
new TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME).build();
@@ -143,18 +144,34 @@ public class JsonMatchPredicateTest extends BaseQueriesTest {
_indexSegments = Arrays.asList(immutableSegment, immutableSegment);
}
+ /** Verify result column type of a simple select query against JSON column */
+ @Test
+ public void testSimpleSelectOnJsonColumn() {
+ try {
+ Operator operator = getOperatorForSqlQuery("select jsonColumn FROM testTable");
+ IntermediateResultsBlock block = (IntermediateResultsBlock) operator.nextBlock();
+ Collection<Object[]> rows = block.getSelectionResult();
+ Assert.assertEquals(rows.size(), 9);
+ Assert.assertEquals(block.getDataSchema().getColumnDataType(0), DataSchema.ColumnDataType.JSON);
+ } catch (IllegalStateException ise) {
+ Assert.assertTrue(true);
+ }
+ }
+
/** Test filtering on string value associated with JSON key*/
@Test
public void testExtractScalarWithStringFilter() {
Operator operator = getOperatorForSqlQuery(
- "select json_extract_scalar(jsonColumn, '$.name.last', 'STRING') FROM testTable WHERE json_extract_scalar(jsonColumn, '$.name.first', 'STRING') = 'daffy'");
+ "select intColumn, json_extract_scalar(jsonColumn, '$.name.last', 'STRING') FROM testTable WHERE json_extract_scalar(jsonColumn, '$.name.first', 'STRING') = 'daffy'");
IntermediateResultsBlock block = (IntermediateResultsBlock) operator.nextBlock();
Collection<Object[]> rows = block.getSelectionResult();
Assert.assertEquals(rows.size(), 1);
Iterator<Object[]> iterator = rows.iterator();
Assert.assertTrue(iterator.hasNext());
- Assert.assertEquals(iterator.next()[0], "duck");
+ Object[] row = iterator.next();
+ Assert.assertEquals(row[0], 1);
+ Assert.assertEquals(row[1], "duck");
}
/** Test filtering on number value associated with JSON key*/
@@ -282,37 +299,6 @@ public class JsonMatchPredicateTest extends BaseQueriesTest {
Assert.assertEquals(iterator.next()[0], "goofy");
}
- /** Evaluate json_extract_scalar over string column that does not contain valid json data. */
- @Test
- public void testJsonExtractScalarAgainstInvalidJson() {
- // json_extract_scalar throws exception since we are trying to parse a non-JSON string.
- Operator operator1 = getOperatorForSqlQuery(
- "select count(*) FROM testTable WHERE json_extract_scalar(stringColumn, '$.name.first', 'INT') = 0");
- try {
- IntermediateResultsBlock block1 = (IntermediateResultsBlock) operator1.nextBlock();
- Assert.fail("Expected query to fail with Exception.");
- } catch (RuntimeException re) {
- // JSON parsing exception expected.
- }
-
- // JSON data is stored in columns of type STRING, so there is nothing preventing the column from storing bad json
- // string. Bad JSON string in columns will cause json_extract_scalar to throw an exception which would terminate
- // query processing. However, when json_extract_scalar is used within the WHERE clause, we should return the
- // default value instead of throwing exception. This will allow the predicate to be evaluated to either true or
- // false and hence allow the query to complete successfully. Returning default value from json_extract_scalar is
- // an undocumented feature. Ideally, json_extract_scalar should return NULL when it encounters bad JSON. However,
- // NULL support is currently pending, so this is the best we can do.
- Operator operator2 = getOperatorForSqlQuery(
- "select count(*) FROM testTable WHERE json_extract_scalar(stringColumn, '$.name.first', 'INT', 0) = 0");
-
- IntermediateResultsBlock block2 = (IntermediateResultsBlock) operator2.nextBlock();
- Collection<Object[]> rows = block2.getSelectionResult();
-
- // None of the values in stringColumn are valid JSON. Hence, json_extract_scalar should default to '0' for all rows
- // and count returned by the query should be 9 (same as number of rows in the table).
- Assert.assertEquals(block2.getAggregationResult().get(0), 9L);
- }
-
@AfterClass
public void tearDown()
throws IOException {
diff --git a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
index c85fe86..9325a2c 100644
--- a/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/queries/JsonMatchPredicateTest.java
@@ -51,6 +51,9 @@ import org.testng.annotations.Test;
/**
* Test cases verifying evaluation of predicate with expressions that contain numerical values of different types.
+ * TODO: Update these test cases to: 1) use V2 JSON_MATCH function, 2) use multi-dimensional JSON array addressing,
+ * 3) do json_extract_scalar on a column other than the JSON_MATCH column, 4) query deeper levels of nesting, and
+ * 5) add test cases for GROUP BY on json_extract_scalar or path expressions.
*/
public class JsonMatchPredicateTest extends BaseQueriesTest {
private static final File INDEX_DIR = new File(FileUtils.getTempDirectory(), "JsonMatchPredicateTest");
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
index d3bca59..8549893 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/index/loader/columnminmaxvalue/ColumnMinMaxValueGenerator.java
@@ -64,7 +64,7 @@ public class ColumnMinMaxValueGenerator {
Set<String> columnsToAddMinMaxValue = new HashSet<>(schema.getPhysicalColumnNames());
// mode ALL - use all columns
- // mode NON_METRIC - use all dimensions and time columns
+ // mode NON_METRIC - use all dimensions and time columns
// mode TIME - use only time columns
switch (_columnMinMaxValueGeneratorMode) {
case TIME:
@@ -89,7 +89,7 @@ public class ColumnMinMaxValueGenerator {
}
PinotDataBuffer dictionaryBuffer = _segmentWriter.getIndexFor(columnName, ColumnIndexType.DICTIONARY);
- DataType dataType = columnMetadata.getDataType();
+ DataType dataType = columnMetadata.getDataType().getStoredType();
int length = columnMetadata.getCardinality();
switch (dataType) {
case INT:
@@ -146,8 +146,8 @@ public class ColumnMinMaxValueGenerator {
private void saveMetadata()
throws Exception {
if (_minMaxValueAdded) {
- // Commons Configuration 1.10 does not support file path containing '%'.
- // Explicitly providing the output stream for the file bypasses the problem.
+ // Commons Configuration 1.10 does not support file path containing '%'.
+ // Explicitly providing the output stream for the file bypasses the problem.
try (FileOutputStream fileOutputStream = new FileOutputStream(_segmentProperties.getFile())) {
_segmentProperties.save(fileOutputStream);
}
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
index 0b5d7ee..eb8b031 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
@@ -43,6 +43,7 @@ public class RecordTransformerTest {
.addSingleValueDimension("svFloat", DataType.FLOAT).addSingleValueDimension("svDouble", DataType.DOUBLE)
.addSingleValueDimension("svBoolean", DataType.BOOLEAN).addSingleValueDimension("svTimestamp", DataType.TIMESTAMP)
.addSingleValueDimension("svBytes", DataType.BYTES).addMultiValueDimension("mvInt", DataType.INT)
+ .addSingleValueDimension("svJson", DataType.JSON)
.addMultiValueDimension("mvLong", DataType.LONG).addMultiValueDimension("mvFloat", DataType.FLOAT)
.addMultiValueDimension("mvDouble", DataType.DOUBLE)
// For sanitation
@@ -70,6 +71,7 @@ public class RecordTransformerTest {
record.putValue("svBoolean", "true");
record.putValue("svTimestamp", "2020-02-02 22:22:22.222");
record.putValue("svBytes", "7b7b"/*new byte[]{123, 123}*/);
+ record.putValue("svJson", "{\"first\": \"daffy\", \"last\": \"duck\"}");
record.putValue("mvInt", new Object[]{123L});
record.putValue("mvLong", Collections.singletonList(123f));
record.putValue("mvFloat", new Double[]{123d});
@@ -141,6 +143,7 @@ public class RecordTransformerTest {
assertEquals(record.getValue("svBoolean"), 1);
assertEquals(record.getValue("svTimestamp"), Timestamp.valueOf("2020-02-02 22:22:22.222").getTime());
assertEquals(record.getValue("svBytes"), new byte[]{123, 123});
+ assertEquals(record.getValue("svJson"), "{\"first\":\"daffy\",\"last\":\"duck\"}");
assertEquals(record.getValue("mvInt"), new Object[]{123});
assertEquals(record.getValue("mvLong"), new Object[]{123L});
assertEquals(record.getValue("mvFloat"), new Object[]{123f});
@@ -190,6 +193,7 @@ public class RecordTransformerTest {
assertEquals(record.getValue("svBoolean"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN);
assertEquals(record.getValue("svTimestamp"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP);
assertEquals(record.getValue("svBytes"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES);
+ assertEquals(record.getValue("svJson"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON);
assertEquals(record.getValue("mvInt"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT});
assertEquals(record.getValue("mvLong"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG});
assertEquals(record.getValue("mvFloat"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT});
@@ -210,6 +214,7 @@ public class RecordTransformerTest {
assertTrue(record.isNullValue("svBoolean"));
assertTrue(record.isNullValue("svTimestamp"));
assertTrue(record.isNullValue("svBytes"));
+ assertTrue(record.isNullValue("svJson"));
assertTrue(record.isNullValue("mvInt"));
assertTrue(record.isNullValue("mvLong"));
assertTrue(record.isNullValue("mvDouble"));
@@ -234,6 +239,7 @@ public class RecordTransformerTest {
assertEquals(record.getValue("svDouble"), 123d);
assertEquals(record.getValue("svBoolean"), 1);
assertEquals(record.getValue("svTimestamp"), Timestamp.valueOf("2020-02-02 22:22:22.222").getTime());
+ assertEquals(record.getValue("svJson"),"{\"first\":\"daffy\",\"last\":\"duck\"}");
assertEquals(record.getValue("svBytes"), new byte[]{123, 123});
assertEquals(record.getValue("mvInt"), new Object[]{123});
assertEquals(record.getValue("mvLong"), new Object[]{123L});
@@ -259,6 +265,7 @@ public class RecordTransformerTest {
assertEquals(record.getValue("svBoolean"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN);
assertEquals(record.getValue("svTimestamp"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP);
assertEquals(record.getValue("svBytes"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES);
+ assertEquals(record.getValue("svJson"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_JSON);
assertEquals(record.getValue("mvInt"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_INT});
assertEquals(record.getValue("mvLong"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_LONG});
assertEquals(record.getValue("mvFloat"), new Object[]{FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT});
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
index 46196ff..f77facb 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/FieldSpec.java
@@ -52,6 +52,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
public static final Integer DEFAULT_DIMENSION_NULL_VALUE_OF_BOOLEAN = 0;
public static final Long DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP = 0L;
public static final String DEFAULT_DIMENSION_NULL_VALUE_OF_STRING = "null";
+ public static final String DEFAULT_DIMENSION_NULL_VALUE_OF_JSON = "null";
public static final byte[] DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES = new byte[0];
public static final Integer DEFAULT_METRIC_NULL_VALUE_OF_INT = 0;
@@ -229,6 +230,8 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
return DEFAULT_DIMENSION_NULL_VALUE_OF_TIMESTAMP;
case STRING:
return DEFAULT_DIMENSION_NULL_VALUE_OF_STRING;
+ case JSON:
+ return DEFAULT_DIMENSION_NULL_VALUE_OF_JSON;
case BYTES:
return DEFAULT_DIMENSION_NULL_VALUE_OF_BYTES;
default:
@@ -303,6 +306,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
jsonNode.put(key, new Timestamp((Long) _defaultNullValue).toString());
break;
case STRING:
+ case JSON:
jsonNode.put(key, (String) _defaultNullValue);
break;
case BYTES:
@@ -375,6 +379,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
BOOLEAN /* Stored as INT */,
TIMESTAMP /* Stored as LONG */,
STRING,
+ JSON /* Stored as STRING */,
BYTES,
STRUCT,
MAP,
@@ -392,6 +397,8 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
return INT;
case TIMESTAMP:
return LONG;
+ case JSON:
+ return STRING;
default:
return this;
}
@@ -451,6 +458,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
case TIMESTAMP:
return TimestampUtils.toMillisSinceEpoch(value);
case STRING:
+ case JSON:
return value;
case BYTES:
return BytesUtils.toBytes(value);
@@ -481,6 +489,7 @@ public abstract class FieldSpec implements Comparable<FieldSpec>, Serializable {
case TIMESTAMP:
return TimestampUtils.toMillisSinceEpoch(value);
case STRING:
+ case JSON:
return value;
case BYTES:
return BytesUtils.toByteArray(value);
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
index ebbce81..40b55d1 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/Schema.java
@@ -443,6 +443,7 @@ public final class Schema implements Serializable {
case BOOLEAN:
case TIMESTAMP:
case STRING:
+ case JSON:
case BYTES:
break;
default:
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
index 4077a53..a3e16a0 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/JsonUtils.java
@@ -193,6 +193,7 @@ public class JsonUtils {
return jsonValue.asBoolean();
case TIMESTAMP:
case STRING:
+ case JSON:
return jsonValue.asText();
case BYTES:
try {
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org