You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by kh...@apache.org on 2024/02/14 06:14:29 UTC
(pinot) branch master updated: Adding bytes string data type integration tests (#12387)
This is an automated email from the ASF dual-hosted git repository.
kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 04dd57924b Adding bytes string data type integration tests (#12387)
04dd57924b is described below
commit 04dd57924b45cf536470c827f4b56584d76df058
Author: Xiang Fu <xi...@gmail.com>
AuthorDate: Tue Feb 13 22:14:22 2024 -0800
Adding bytes string data type integration tests (#12387)
---
.../common/function/scalar/StringFunctions.java | 10 +
.../integration/tests/custom/BytesTypeTest.java | 314 +++++++++++++++++++++
2 files changed, 324 insertions(+)
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
index 5a49314943..22f312a8b8 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/function/scalar/StringFunctions.java
@@ -48,6 +48,7 @@ public class StringFunctions {
private final static Pattern LTRIM = Pattern.compile("^\\s+");
private final static Pattern RTRIM = Pattern.compile("\\s+$");
+
/**
* @see StringUtils#reverse(String)
* @param input
@@ -495,6 +496,15 @@ public class StringFunctions {
return input.getBytes(StandardCharsets.US_ASCII);
}
+ /**
+ * @param input bytes
+ * @return ASCII encoded string
+ */
+ @ScalarFunction
+ public static String fromAscii(byte[] input) {
+ return new String(input, StandardCharsets.US_ASCII);
+ }
+
/**
* @param input UUID as string
* @return bytearray
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/BytesTypeTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/BytesTypeTest.java
new file mode 100644
index 0000000000..8e3f18c30d
--- /dev/null
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/BytesTypeTest.java
@@ -0,0 +1,314 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests.custom;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.util.Base64;
+import java.util.UUID;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.commons.codec.binary.Hex;
+import org.apache.pinot.common.function.scalar.DataTypeConversionFunctions;
+import org.apache.pinot.common.function.scalar.StringFunctions;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+
+@Test(suiteName = "CustomClusterIntegrationTest")
+public class BytesTypeTest extends CustomDataQueryClusterIntegrationTest {
+
+ protected static final String DEFAULT_TABLE_NAME = "BytesTypeTest";
+ private static final String FIXED_HEX_STRIING_VALUE = "968a3c6a5eeb42168bae0e895034a26f";
+
+ private static final int NUM_TOTAL_DOCS = 1000;
+ private static final String HEX_STR = "hexStr";
+ private static final String HEX_BYTES = "hexBytes";
+ private static final String UUID_STR = "uuidStr";
+ private static final String UUID_BYTES = "uuidBytes";
+ private static final String UTF8_STR = "utf8Str";
+ private static final String UTF8_BYTES = "utf8Bytes";
+ private static final String ASCII_STR = "asciiStr";
+ private static final String ASCII_BYTES = "asciiBytes";
+ private static final String BASE64_STR = "base64Str";
+ private static final String BASE64_BYTES = "base64Bytes";
+ private static final String RANDOM_STR = "randomStr";
+ private static final String RANDOM_BYTES = "randomBytes";
+ private static final String FIXED_STRING = "fixedString";
+ private static final String FIXED_BYTES = "fixedBytes";
+
+ @Override
+ public String getTableName() {
+ return DEFAULT_TABLE_NAME;
+ }
+
+ public Schema createSchema() {
+ return new Schema.SchemaBuilder().setSchemaName(getTableName())
+ .addSingleValueDimension(HEX_STR, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(HEX_BYTES, FieldSpec.DataType.BYTES)
+ .addSingleValueDimension(UUID_STR, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(UUID_BYTES, FieldSpec.DataType.BYTES)
+ .addSingleValueDimension(UTF8_STR, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(UTF8_BYTES, FieldSpec.DataType.BYTES)
+ .addSingleValueDimension(ASCII_STR, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(ASCII_BYTES, FieldSpec.DataType.BYTES)
+ .addSingleValueDimension(BASE64_STR, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(BASE64_BYTES, FieldSpec.DataType.BYTES)
+ .addSingleValueDimension(RANDOM_STR, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(RANDOM_BYTES, FieldSpec.DataType.BYTES)
+ .addSingleValueDimension(FIXED_STRING, FieldSpec.DataType.STRING)
+ .addSingleValueDimension(FIXED_BYTES, FieldSpec.DataType.BYTES)
+ .build();
+ }
+
+ @Override
+ protected long getCountStarResult() {
+ return NUM_TOTAL_DOCS;
+ }
+
+ @Override
+ public File createAvroFile()
+ throws Exception {
+ // create avro schema
+ org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord("myRecord", null, null, false);
+ avroSchema.setFields(ImmutableList.of(
+ new org.apache.avro.Schema.Field(HEX_STR, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING),
+ null, null),
+ new org.apache.avro.Schema.Field(HEX_BYTES, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BYTES),
+ null, null),
+ new org.apache.avro.Schema.Field(UUID_STR, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING),
+ null, null),
+ new org.apache.avro.Schema.Field(UUID_BYTES, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BYTES),
+ null, null),
+ new org.apache.avro.Schema.Field(UTF8_STR, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING),
+ null, null),
+ new org.apache.avro.Schema.Field(UTF8_BYTES, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BYTES),
+ null, null),
+ new org.apache.avro.Schema.Field(ASCII_STR, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING),
+ null, null),
+ new org.apache.avro.Schema.Field(ASCII_BYTES, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BYTES),
+ null, null),
+ new org.apache.avro.Schema.Field(BASE64_STR, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING),
+ null, null),
+ new org.apache.avro.Schema.Field(BASE64_BYTES, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BYTES),
+ null, null),
+ new org.apache.avro.Schema.Field(RANDOM_STR, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING),
+ null, null),
+ new org.apache.avro.Schema.Field(RANDOM_BYTES, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BYTES),
+ null, null),
+ new org.apache.avro.Schema.Field(FIXED_STRING,
+ org.apache.avro.Schema.create(org.apache.avro.Schema.Type.STRING), null, null),
+ new org.apache.avro.Schema.Field(FIXED_BYTES, org.apache.avro.Schema.create(org.apache.avro.Schema.Type.BYTES),
+ null, null)
+
+ ));
+
+ File avroFile = new File(_tempDir, "data.avro");
+ try (DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(new GenericDatumWriter<>(avroSchema))) {
+ fileWriter.create(avroSchema, avroFile);
+ for (int i = 0; i < NUM_TOTAL_DOCS; i++) {
+ GenericData.Record record = new GenericData.Record(avroSchema);
+ byte[] bytes = newRandomBytes(RANDOM.nextInt(100) * 2 + 2);
+ String hexString = Hex.encodeHexString(bytes);
+ record.put(HEX_STR, hexString);
+ record.put(HEX_BYTES, ByteBuffer.wrap(bytes));
+ UUID uuid = java.util.UUID.randomUUID();
+ record.put(UUID_STR, uuid.toString());
+ record.put(UUID_BYTES, ByteBuffer.wrap(StringFunctions.toUUIDBytes(uuid.toString())));
+ String utf8String = "utf8String" + i;
+ record.put(UTF8_STR, utf8String);
+ record.put(UTF8_BYTES, ByteBuffer.wrap(StringFunctions.toUtf8(utf8String)));
+ String asciiString = "asciiString" + i;
+ record.put(ASCII_STR, asciiString);
+ record.put(ASCII_BYTES, ByteBuffer.wrap(StringFunctions.toAscii(asciiString)));
+ String base64String = newRandomBase64String();
+ record.put(BASE64_STR, base64String);
+ record.put(BASE64_BYTES, ByteBuffer.wrap(StringFunctions.fromBase64(base64String)));
+ byte[] randomBytes = newRandomBytes();
+ record.put(RANDOM_STR, new String(randomBytes));
+ record.put(RANDOM_BYTES, ByteBuffer.wrap(randomBytes));
+ record.put(FIXED_STRING, FIXED_HEX_STRIING_VALUE);
+ record.put(FIXED_BYTES, ByteBuffer.wrap(DataTypeConversionFunctions.hexToBytes(FIXED_HEX_STRIING_VALUE)));
+ fileWriter.append(record);
+ }
+ }
+
+ return avroFile;
+ }
+
+ private static String newRandomBase64String() {
+ byte[] bytes = newRandomBytes(RANDOM.nextInt(100) * 2 + 2);
+ return Base64.getEncoder().encodeToString(bytes);
+ }
+
+ private static byte[] newRandomBytes() {
+ return newRandomBytes(RANDOM.nextInt(100));
+ }
+
+ private static byte[] newRandomBytes(int length) {
+ byte[] bytes = new byte[length];
+ for (int i = 0; i < length; i++) {
+ bytes[i] = (byte) (Math.random() * 256);
+ }
+ return bytes;
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testGetHexagonAddress(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String query =
+ String.format("Select bytesToHex(%s), %s, hexToBytes(%s), %s from %s", HEX_BYTES, HEX_STR, HEX_STR, HEX_BYTES,
+ getTableName());
+ JsonNode pinotResponse = postQuery(query);
+ JsonNode rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asText(), rows.get(i).get(1).asText());
+ Assert.assertEquals(rows.get(i).get(2).asText(), rows.get(i).get(3).asText());
+ }
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testGetUUID(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String query =
+ String.format("Select fromUUIDBytes(%s), %s, toUUIDBytes(%s), %s from %s", UUID_BYTES, UUID_STR, UUID_STR,
+ UUID_BYTES,
+ getTableName());
+ JsonNode pinotResponse = postQuery(query);
+ JsonNode rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asText(), rows.get(i).get(1).asText());
+ Assert.assertEquals(rows.get(i).get(2).asText(), rows.get(i).get(3).asText());
+ }
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testGetUTF8(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String query =
+ String.format("Select fromUtf8(%s), %s, toUtf8(%s), %s from %s", UTF8_BYTES, UTF8_STR, UTF8_STR, UTF8_BYTES,
+ getTableName());
+ JsonNode pinotResponse = postQuery(query);
+ JsonNode rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asText(), rows.get(i).get(1).asText());
+ Assert.assertEquals(rows.get(i).get(2).asText(), rows.get(i).get(3).asText());
+ }
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testGetASCII(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String query =
+ String.format("Select fromAscii(%s), %s, toAscii(%s), %s from %s", ASCII_BYTES, ASCII_STR, ASCII_STR,
+ ASCII_BYTES,
+ getTableName());
+ JsonNode pinotResponse = postQuery(query);
+ JsonNode rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asText(), rows.get(i).get(1).asText());
+ Assert.assertEquals(rows.get(i).get(2).asText(), rows.get(i).get(3).asText());
+ }
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testGetBase64(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String query =
+ String.format("Select toBase64(%s), %s, fromBase64(%s), %s from %s", BASE64_BYTES, BASE64_STR, BASE64_STR,
+ BASE64_BYTES,
+ getTableName());
+ JsonNode pinotResponse = postQuery(query);
+ JsonNode rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asText(), rows.get(i).get(1).asText());
+ Assert.assertEquals(rows.get(i).get(2).asText(), rows.get(i).get(3).asText());
+ }
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testGetRandomBytes(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String query =
+ String.format("Select fromBytes(%s, 'UTF-8'), %s, toBytes(%s, 'UTF-8'), %s from %s", UTF8_BYTES, UTF8_STR,
+ UTF8_STR, UTF8_BYTES, getTableName());
+ JsonNode pinotResponse = postQuery(query);
+ JsonNode rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asText(), rows.get(i).get(1).asText());
+ Assert.assertEquals(rows.get(i).get(2).asText(), rows.get(i).get(3).asText());
+ }
+
+ query =
+ String.format("Select fromBytes(%s, 'ASCII'), %s, toBytes(%s, 'ASCII'), %s from %s", ASCII_BYTES, ASCII_STR,
+ ASCII_STR, ASCII_BYTES, getTableName());
+ pinotResponse = postQuery(query);
+ rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asText(), rows.get(i).get(1).asText());
+ Assert.assertEquals(rows.get(i).get(2).asText(), rows.get(i).get(3).asText());
+ }
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testStringAndBytesInPredicate(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+
+ // String predicate
+ String query =
+ String.format("Select count(*) from %s WHERE %s = '%s'", getTableName(), FIXED_STRING, FIXED_HEX_STRIING_VALUE);
+ JsonNode pinotResponse = postQuery(query);
+ JsonNode rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asLong(), NUM_TOTAL_DOCS);
+ }
+
+ // Bytes predicate, convert literal string to bytes
+ query =
+ String.format("Select count(*) from %s WHERE %s = hexToBytes('%s')", getTableName(), FIXED_BYTES,
+ FIXED_HEX_STRIING_VALUE);
+ pinotResponse = postQuery(query);
+ rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asLong(), NUM_TOTAL_DOCS);
+ }
+
+ // Bytes predicate, convert column to hex string to compare with a literal string
+ query =
+ String.format("Select count(*) from %s WHERE bytesToHex(%s) = '%s'", getTableName(), FIXED_BYTES,
+ FIXED_HEX_STRIING_VALUE);
+ pinotResponse = postQuery(query);
+ rows = pinotResponse.get("resultTable").get("rows");
+ for (int i = 0; i < rows.size(); i++) {
+ Assert.assertEquals(rows.get(i).get(0).asLong(), NUM_TOTAL_DOCS);
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org