You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by xi...@apache.org on 2023/07/25 04:44:13 UTC
[pinot] branch master updated: Support sum precision (#11162)
This is an automated email from the ASF dual-hosted git repository.
xiangfu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new a7530255d4 Support sum precision (#11162)
a7530255d4 is described below
commit a7530255d4e1dfa40eb591acfbf174bc61f0e056
Author: Xiang Fu <xi...@gmail.com>
AuthorDate: Mon Jul 24 21:44:06 2023 -0700
Support sum precision (#11162)
---
.../tests/SumPrecisionIntegrationTest.java | 167 +++++++++++++++++++++
.../src/test/resources/queries/UDFAggregates.json | 4 +
.../pinot/segment/spi/AggregationFunctionType.java | 3 +-
3 files changed, 173 insertions(+), 1 deletion(-)
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SumPrecisionIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SumPrecisionIntegrationTest.java
new file mode 100644
index 0000000000..8b8a8ab41f
--- /dev/null
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/SumPrecisionIntegrationTest.java
@@ -0,0 +1,167 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.integration.tests;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.google.common.collect.ImmutableList;
+import java.io.File;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.nio.ByteBuffer;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.avro.Schema.Field;
+import org.apache.avro.Schema.Type;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.RandomUtils;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.utils.BigDecimalUtils;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.apache.pinot.util.TestUtils;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class SumPrecisionIntegrationTest extends BaseClusterIntegrationTest {
+ private static final String DIM_NAME = "dimName";
+ private static final String MET_BIG_DECIMAL_BYTES = "metBigDecimalBytes";
+ private static final String MET_BIG_DECIMAL_STRING = "metBigDecimalString";
+ private static final String MET_DOUBLE = "metDouble";
+ private static final String MET_LONG = "metLong";
+
+ @BeforeClass
+ public void setup()
+ throws Exception {
+ TestUtils.ensureDirectoriesExistAndEmpty(_tempDir, _segmentDir, _tarDir);
+
+ // Start the Pinot cluster
+ startZk();
+ startController();
+ startBroker();
+ startServer();
+
+ // create & upload schema AND table config
+ Schema schema = new Schema.SchemaBuilder().setSchemaName(DEFAULT_SCHEMA_NAME)
+ .addSingleValueDimension(DIM_NAME, FieldSpec.DataType.STRING)
+ .addMetric(MET_BIG_DECIMAL_BYTES, FieldSpec.DataType.BIG_DECIMAL)
+ .addMetric(MET_BIG_DECIMAL_STRING, FieldSpec.DataType.BIG_DECIMAL)
+ .addMetric(MET_DOUBLE, FieldSpec.DataType.DOUBLE)
+ .addMetric(MET_LONG, FieldSpec.DataType.LONG).build();
+ addSchema(schema);
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(DEFAULT_TABLE_NAME).build();
+ addTableConfig(tableConfig);
+
+ // create & upload segments
+ File avroFile = createAvroFile(getCountStarResult());
+ ClusterIntegrationTestUtils.buildSegmentFromAvro(avroFile, tableConfig, schema, 0, _segmentDir, _tarDir);
+ uploadSegments(DEFAULT_TABLE_NAME, _tarDir);
+
+ waitForAllDocsLoaded(60_000);
+ }
+
+ @Override
+ protected long getCountStarResult() {
+ return 1000;
+ }
+
+ @Test(dataProvider = "useBothQueryEngines")
+ public void testQueries(boolean useMultiStageQueryEngine)
+ throws Exception {
+ setUseMultiStageQueryEngine(useMultiStageQueryEngine);
+ String query =
+ String.format("SELECT SUMPRECISION(%s), SUMPRECISION(%s), sum(%s), sum(%s) FROM %s",
+ MET_BIG_DECIMAL_BYTES, MET_BIG_DECIMAL_STRING, MET_DOUBLE, MET_LONG, DEFAULT_TABLE_NAME);
+ double sumResult = 2147484147500L;
+ JsonNode jsonNode = postQuery(query);
+ System.out.println("jsonNode = " + jsonNode.toPrettyString());
+ for (int i = 0; i < 4; i++) {
+ assertEquals(Double.parseDouble(jsonNode.get("resultTable").get("rows").get(0).get(i).asText()), sumResult);
+ }
+ }
+
+ private void runAndAssert(String query, Map<String, Integer> expectedGroupToValueMap)
+ throws Exception {
+ Map<String, Integer> actualGroupToValueMap = new HashMap<>();
+ JsonNode jsonNode = postQuery(query);
+ jsonNode.get("resultTable").get("rows").forEach(node -> {
+ String group = node.get(0).textValue();
+ int value = node.get(1).intValue();
+ actualGroupToValueMap.put(group, value);
+ });
+ assertEquals(actualGroupToValueMap, expectedGroupToValueMap);
+ }
+
+ private File createAvroFile(long totalNumRecords)
+ throws IOException {
+
+ // create avro schema
+ org.apache.avro.Schema avroSchema = org.apache.avro.Schema.createRecord("myRecord", null, null, false);
+ avroSchema.setFields(ImmutableList.of(
+ new Field(DIM_NAME, org.apache.avro.Schema.create(Type.STRING), null, null),
+ new Field(MET_BIG_DECIMAL_BYTES, org.apache.avro.Schema.create(Type.BYTES), null, null),
+ new Field(MET_BIG_DECIMAL_STRING, org.apache.avro.Schema.create(Type.STRING), null, null),
+ new Field(MET_DOUBLE, org.apache.avro.Schema.create(Type.DOUBLE), null, null),
+ new Field(MET_LONG, org.apache.avro.Schema.create(Type.LONG), null, null)));
+
+ // create avro file
+ File avroFile = new File(_tempDir, "data.avro");
+ try (DataFileWriter<GenericData.Record> fileWriter = new DataFileWriter<>(new GenericDatumWriter<>(avroSchema))) {
+ fileWriter.create(avroSchema, avroFile);
+ int dimCardinality = 50;
+ BigDecimal bigDecimalBase = BigDecimal.valueOf(Integer.MAX_VALUE + 1L);
+ for (int i = 0; i < totalNumRecords; i++) {
+ // create avro record
+ GenericData.Record record = new GenericData.Record(avroSchema);
+ record.put(DIM_NAME, "dim" + (RandomUtils.nextInt() % dimCardinality));
+ BigDecimal bigDecimalValue = bigDecimalBase.add(BigDecimal.valueOf(i));
+
+ record.put(MET_BIG_DECIMAL_BYTES, ByteBuffer.wrap(BigDecimalUtils.serialize(bigDecimalValue)));
+ record.put(MET_BIG_DECIMAL_STRING, bigDecimalValue.toPlainString());
+ record.put(MET_DOUBLE, bigDecimalValue.doubleValue());
+ record.put(MET_LONG, bigDecimalValue.longValue());
+
+ // add avro record to file
+ fileWriter.append(record);
+ }
+ }
+ return avroFile;
+ }
+
+ @AfterClass
+ public void tearDown()
+ throws IOException {
+ dropOfflineTable(DEFAULT_TABLE_NAME);
+
+ stopServer();
+ stopBroker();
+ stopController();
+ stopZk();
+
+ FileUtils.deleteDirectory(_tempDir);
+ }
+}
diff --git a/pinot-query-runtime/src/test/resources/queries/UDFAggregates.json b/pinot-query-runtime/src/test/resources/queries/UDFAggregates.json
index aeb6b5333a..f5a765e9ad 100644
--- a/pinot-query-runtime/src/test/resources/queries/UDFAggregates.json
+++ b/pinot-query-runtime/src/test/resources/queries/UDFAggregates.json
@@ -59,6 +59,10 @@
{
"sql": "SELECT /*+ aggOptions(is_skip_leaf_stage_aggregate='true') */ string_col, PERCENTILE(float_col, 50), PERCENTILE(double_col, 5), PERCENTILE(int_col, 75), PERCENTILE(long_col, 75) FROM {tbl} GROUP BY string_col",
"outputs": [["a", 400, 300, 2, 2], ["b", 100, 1, 100, 100], ["c", 1.5, 1.01, 175, 175]]
+ },
+ {
+ "sql": "SELECT SUMPRECISION(decimal_col) FROM {tbl}",
+ "outputs": [["10000000000100000000110000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 [...]
}
]
}
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
index e603f04f9f..b636ed4f24 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/AggregationFunctionType.java
@@ -61,7 +61,8 @@ public enum AggregationFunctionType {
ReturnTypes.explicit(SqlTypeName.DOUBLE)),
SUM0("$sum0", null, SqlKind.SUM0, SqlFunctionCategory.NUMERIC, OperandTypes.NUMERIC,
ReturnTypes.AGG_SUM_EMPTY_IS_ZERO, ReturnTypes.explicit(SqlTypeName.DOUBLE)),
- SUMPRECISION("sumPrecision"),
+ SUMPRECISION("sumPrecision", null, SqlKind.OTHER_FUNCTION, SqlFunctionCategory.USER_DEFINED_FUNCTION,
+ OperandTypes.ANY, ReturnTypes.explicit(SqlTypeName.DECIMAL), ReturnTypes.explicit(SqlTypeName.OTHER)),
AVG("avg"),
MODE("mode"),
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org