You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by sn...@apache.org on 2024/02/06 16:57:39 UTC
(pinot) branch master updated: Add Helper Functions in StarTreeBuilderUtils and StarTreeV2BuilderConfig (#12361)
This is an automated email from the ASF dual-hosted git repository.
snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new cad8c56d9d Add Helper Functions in StarTreeBuilderUtils and StarTreeV2BuilderConfig (#12361)
cad8c56d9d is described below
commit cad8c56d9d05d0b8e19a580f8652b6b2fb392fda
Author: aishikbh <ai...@startree.ai>
AuthorDate: Tue Feb 6 22:27:32 2024 +0530
Add Helper Functions in StarTreeBuilderUtils and StarTreeV2BuilderConfig (#12361)
* changed some stuffs
* fixed an issue for generating default builder configs
* optimize passing segment metadata
* Add helper function and unit test
* Added helper function to compare 2 StarTree builder configs.
* Added unit test for generateDefaultConfig from JsonNode segment metadata.
* add additional unit tests.
* addressing comments
---
.../local/startree/StarTreeBuilderUtils.java | 42 ++++
.../v2/builder/StarTreeV2BuilderConfig.java | 77 ++++++++
.../v2/builder/StarTreeBuilderUtilsTest.java | 218 +++++++++++++++++++++
.../v2/builder/StarTreeV2BuilderConfigTest.java | 50 +++++
4 files changed, 387 insertions(+)
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java
index 59625678eb..e45eb0526d 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java
@@ -18,6 +18,7 @@
*/
package org.apache.pinot.segment.local.startree;
+import com.fasterxml.jackson.databind.JsonNode;
import java.io.File;
import java.io.IOException;
import java.nio.ByteOrder;
@@ -37,6 +38,7 @@ import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
import org.apache.pinot.segment.spi.utils.SegmentMetadataUtils;
import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
+import org.apache.pinot.spi.data.Schema;
import static java.nio.charset.StandardCharsets.UTF_8;
@@ -83,6 +85,27 @@ public class StarTreeBuilderUtils {
return builderConfigs;
}
+ public static List<StarTreeV2BuilderConfig> generateBuilderConfigs(@Nullable List<StarTreeIndexConfig> indexConfigs,
+ boolean enableDefaultStarTree, Schema schema, JsonNode segmentMetadata) {
+ List<StarTreeV2BuilderConfig> builderConfigs = new ArrayList<>();
+ if (indexConfigs != null) {
+ for (StarTreeIndexConfig indexConfig : indexConfigs) {
+ StarTreeV2BuilderConfig builderConfig = StarTreeV2BuilderConfig.fromIndexConfig(indexConfig);
+ if (!builderConfigs.contains(builderConfig)) {
+ builderConfigs.add(builderConfig);
+ }
+ }
+ }
+ if (enableDefaultStarTree) {
+ StarTreeV2BuilderConfig defaultConfig =
+ StarTreeV2BuilderConfig.generateDefaultConfig(schema, segmentMetadata.get("columns"));
+ if (!builderConfigs.contains(defaultConfig)) {
+ builderConfigs.add(defaultConfig);
+ }
+ }
+ return builderConfigs;
+ }
+
/**
* Serialize the star-tree structure into a file.
*/
@@ -254,6 +277,25 @@ public class StarTreeBuilderUtils {
return false;
}
+ /**
+ * Returns {@code true} if the given star-tree builder configs are equal, {@code false} otherwise.
+ */
+ public static boolean areStarTreeBuilderConfigListsEqual(List<StarTreeV2BuilderConfig> builderConfig1,
+ List<StarTreeV2BuilderConfig> builderConfig2) {
+ int numStarTrees = builderConfig1.size();
+ if (builderConfig2.size() != numStarTrees) {
+ return false;
+ }
+ for (int i = 0; i < numStarTrees; i++) {
+ StarTreeV2BuilderConfig builderConfigToCompare1 = builderConfig1.get(i);
+ StarTreeV2BuilderConfig builderConfigToCompare2 = builderConfig2.get(i);
+ if (!builderConfigToCompare1.equals(builderConfigToCompare2)) {
+ return false;
+ }
+ }
+ return true;
+ }
+
/**
* Removes all the star-trees from the given segment.
*/
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
index b11e3b2b24..eca08f8787 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
@@ -18,9 +18,11 @@
*/
package org.apache.pinot.segment.local.startree.v2.builder;
+import com.fasterxml.jackson.databind.JsonNode;
import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -189,6 +191,81 @@ public class StarTreeV2BuilderConfig {
DEFAULT_MAX_LEAF_RECORDS);
}
+ public static StarTreeV2BuilderConfig generateDefaultConfig(Schema schema, JsonNode columnsMetadata) {
+ List<JsonNode> dimensionColumnMetadataList = new ArrayList<>();
+ List<JsonNode> timeColumnMetadataList = new ArrayList<>();
+ List<String> numericMetrics = new ArrayList<>();
+ Preconditions.checkState(!columnsMetadata.isNull(), "columnsMetadata should not be null.");
+ Preconditions.checkState(!columnsMetadata.isEmpty(), "columnsMetadata should not be empty.");
+
+ // Convert columnsMetadata to a map for easy lookup.
+ Map<String, JsonNode> columnMetadataMap = convertJsonNodeToMap(columnsMetadata);
+
+ for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
+ if (!fieldSpec.isSingleValueField() || fieldSpec.isVirtualColumn()) {
+ continue;
+ }
+ String column = fieldSpec.getName();
+ switch (fieldSpec.getFieldType()) {
+ case DIMENSION:
+ JsonNode columnMetadata = columnMetadataMap.get(column);
+ if (columnMetadata.get("hasDictionary").asBoolean()
+ && columnMetadata.get("cardinality").asInt() <= DIMENSION_CARDINALITY_THRESHOLD_FOR_DEFAULT_CONFIG) {
+ dimensionColumnMetadataList.add(columnMetadata);
+ }
+ break;
+ case DATE_TIME:
+ case TIME:
+ columnMetadata = columnMetadataMap.get(column);
+ if (columnMetadata.get("hasDictionary").asBoolean()) {
+ timeColumnMetadataList.add(columnMetadata);
+ }
+ break;
+ case METRIC:
+ if (fieldSpec.getDataType().isNumeric()) {
+ numericMetrics.add(column);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ // Sort all dimensions/time columns with their cardinality in descending order
+ dimensionColumnMetadataList.sort(
+ (o1, o2) -> Integer.compare(o2.get("cardinality").asInt(), o1.get("cardinality").asInt()));
+ timeColumnMetadataList.sort(
+ (o1, o2) -> Integer.compare(o2.get("cardinality").asInt(), o1.get("cardinality").asInt()));
+
+ List<String> dimensionsSplitOrder = new ArrayList<>();
+ for (JsonNode dimensionColumnMetadata : dimensionColumnMetadataList) {
+ dimensionsSplitOrder.add(dimensionColumnMetadata.get("columnName").asText());
+ }
+ for (JsonNode timeColumnMetadata : timeColumnMetadataList) {
+ dimensionsSplitOrder.add(timeColumnMetadata.get("columnName").asText());
+ }
+ Preconditions.checkState(!dimensionsSplitOrder.isEmpty(), "No qualified dimension found for star-tree split order");
+
+ TreeMap<AggregationFunctionColumnPair, AggregationSpec> aggregationSpecs = new TreeMap<>();
+ aggregationSpecs.put(AggregationFunctionColumnPair.COUNT_STAR, AggregationSpec.DEFAULT);
+ for (String numericMetric : numericMetrics) {
+ aggregationSpecs.put(new AggregationFunctionColumnPair(AggregationFunctionType.SUM, numericMetric),
+ AggregationSpec.DEFAULT);
+ }
+
+ return new StarTreeV2BuilderConfig(dimensionsSplitOrder, Collections.emptySet(), aggregationSpecs,
+ DEFAULT_MAX_LEAF_RECORDS);
+ }
+
+ public static Map<String, JsonNode> convertJsonNodeToMap(JsonNode columnsMetadata) {
+ Map<String, JsonNode> map = new HashMap<>();
+ for (JsonNode columnMetadata : columnsMetadata) {
+ String columnName = columnMetadata.get("columnName").asText();
+ map.put(columnName, columnMetadata);
+ }
+ return map;
+ }
+
private StarTreeV2BuilderConfig(List<String> dimensionsSplitOrder, Set<String> skipStarNodeCreationForDimensions,
TreeMap<AggregationFunctionColumnPair, AggregationSpec> aggregationSpecs, int maxLeafRecords) {
_dimensionsSplitOrder = dimensionsSplitOrder;
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeBuilderUtilsTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeBuilderUtilsTest.java
new file mode 100644
index 0000000000..71ee7c2251
--- /dev/null
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeBuilderUtilsTest.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.startree.v2.builder;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.pinot.segment.local.startree.StarTreeBuilderUtils;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.Constants;
+import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
+import org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
+import org.apache.pinot.spi.config.table.StarTreeAggregationConfig;
+import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.TimeGranularitySpec;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+
+public class StarTreeBuilderUtilsTest {
+ @Test
+ public void testAreStarTreeBuilderConfigListsEqual() {
+ // Create StartTreeIndexConfigs to test for unequal starTree configs.
+ StarTreeIndexConfig starTreeIndexConfig1 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null,
+ Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+ // Different skip star node creation.
+ StarTreeIndexConfig starTreeIndexConfig2 =
+ new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), Collections.singletonList("Distance"),
+ Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+ // Different dimension split order.
+ StarTreeIndexConfig starTreeIndexConfig3 = new StarTreeIndexConfig(Arrays.asList("Distance", "Carrier"), null,
+ Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+ // Different max leaf records.
+ StarTreeIndexConfig starTreeIndexConfig4 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null,
+ Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 200);
+
+ // Create StartTreeAggregationConfigs with StarTreeAggregationConfig.
+ StarTreeAggregationConfig starTreeAggregationConfig1 = new StarTreeAggregationConfig("Distance", "MAX", null);
+
+ // Different AggregationConfig.
+ StarTreeIndexConfig starTreeIndexConfig5 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null, null,
+ Collections.singletonList(starTreeAggregationConfig1), 100);
+
+ // Create StarTreeIndexConfig for equality check.
+ StarTreeIndexConfig starTreeIndexConfig6 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null,
+ Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+ // test unequal builder config size.
+ List<StarTreeV2BuilderConfig> config1 = new ArrayList<>();
+ List<StarTreeV2BuilderConfig> config2 = new ArrayList<>();
+
+ // Add two StartTreeV2BuilderConfigs to config1 and one to config2.
+ config1.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig1));
+ config1.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig2));
+
+ config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig3));
+ assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+ // Test different dimension split order in StartTreeV2BuilderConfig.
+ config1.clear();
+ config1.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig1));
+
+ assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+ // Test different skip star node creation in StartTreeV2BuilderConfig.
+ config2.clear();
+ config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig2));
+
+ assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+ // Test different max leaf records in StartTreeV2BuilderConfig.
+ config2.clear();
+ config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig4));
+
+ assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+ // Test different aggregation configs in StartTreeV2BuilderConfig.
+ config2.clear();
+ config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig5));
+
+ assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+ // Test equal StartTreeV2BuilderConfigs.
+ config2.clear();
+ config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig6));
+
+ assertTrue(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+ }
+
+ @Test
+ public void testGenerateBuilderConfig() {
+
+ // Create Schema and SegmentMetadata for testing.
+ Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("d1", FieldSpec.DataType.INT)
+ .addSingleValueDimension("d2", FieldSpec.DataType.LONG).addSingleValueDimension("d3", FieldSpec.DataType.FLOAT)
+ .addSingleValueDimension("d4", FieldSpec.DataType.DOUBLE).addMultiValueDimension("d5", FieldSpec.DataType.INT)
+ .addMetric("m1", FieldSpec.DataType.DOUBLE).addMetric("m2", FieldSpec.DataType.BYTES)
+ .addTime(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, "t"), null)
+ .addDateTime("dt", FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:HOURS").build();
+ SegmentMetadataImpl segmentMetadata = mock(SegmentMetadataImpl.class);
+ when(segmentMetadata.getSchema()).thenReturn(schema);
+ // Included
+ ColumnMetadata columnMetadata = getColumnMetadata("d1", true, 200);
+ when(segmentMetadata.getColumnMetadataFor("d1")).thenReturn(columnMetadata);
+ // Included
+ columnMetadata = getColumnMetadata("d2", true, 400);
+ when(segmentMetadata.getColumnMetadataFor("d2")).thenReturn(columnMetadata);
+ // Not included because the cardinality is too high
+ columnMetadata = getColumnMetadata("d3", true, 20000);
+ when(segmentMetadata.getColumnMetadataFor("d3")).thenReturn(columnMetadata);
+ // Not included because it is not dictionary-encoded
+ columnMetadata = getColumnMetadata("d4", false, 100);
+ when(segmentMetadata.getColumnMetadataFor("d4")).thenReturn(columnMetadata);
+ // Not included because it is multi-valued
+ columnMetadata = getColumnMetadata("d5", true, 100);
+ when(segmentMetadata.getColumnMetadataFor("d5")).thenReturn(columnMetadata);
+ // Included (metric does not have to be dictionary-encoded or have valid cardinality)
+ columnMetadata = getColumnMetadata("m1", false, Constants.UNKNOWN_CARDINALITY);
+ when(segmentMetadata.getColumnMetadataFor("m1")).thenReturn(columnMetadata);
+ // Not included because it is not numeric
+ columnMetadata = getColumnMetadata("m2", true, 100);
+ when(segmentMetadata.getColumnMetadataFor("m2")).thenReturn(columnMetadata);
+ // Included (do not check cardinality for time column)
+ columnMetadata = getColumnMetadata("t", true, 20000);
+ when(segmentMetadata.getColumnMetadataFor("t")).thenReturn(columnMetadata);
+ // Included (do not check cardinality for date time column)
+ columnMetadata = getColumnMetadata("dt", true, 30000);
+ when(segmentMetadata.getColumnMetadataFor("dt")).thenReturn(columnMetadata);
+
+ // // Create a list of string with column name, hasDictionary and cardinality.
+ List<List<String>> columnList =
+ Arrays.asList(Arrays.asList("d1", "true", "200"), Arrays.asList("d2", "true", "400"),
+ Arrays.asList("d3", "true", "20000"), Arrays.asList("d4", "false", "100"),
+ Arrays.asList("d5", "true", "100"), Arrays.asList("m1", "false", "-1"), Arrays.asList("m2", "true", "100"),
+ Arrays.asList("t", "true", "20000"), Arrays.asList("dt", "true", "30000"));
+
+ // Convert the list of string to JsonNode with appropriate key names and root node.
+ JsonNode segmentMetadataAsJsonNode = convertStringListToJsonNode(columnList);
+
+ // Create StartTreeIndexConfig for testing.
+ StarTreeIndexConfig starTreeIndexConfig1 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null,
+ Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+ StarTreeIndexConfig starTreeIndexConfig2 =
+ new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), Collections.singletonList("Distance"),
+ Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+ // Create StartTreeV2BuilderConfig from segmentMetadataImpl.
+ List<StarTreeV2BuilderConfig> builderConfig1 =
+ StarTreeBuilderUtils.generateBuilderConfigs(Arrays.asList(starTreeIndexConfig1, starTreeIndexConfig2), true,
+ segmentMetadata);
+
+ // Create StartTreeV2BuilderConfig from JsonNode.
+ List<StarTreeV2BuilderConfig> builderConfig2 =
+ StarTreeBuilderUtils.generateBuilderConfigs(Arrays.asList(starTreeIndexConfig1, starTreeIndexConfig2), true,
+ schema, segmentMetadataAsJsonNode);
+
+ // They should be equal.
+ assertEquals(builderConfig1, builderConfig2);
+ }
+
+ private ColumnMetadata getColumnMetadata(String column, boolean hasDictionary, int cardinality) {
+ ColumnMetadata columnMetadata = mock(ColumnMetadata.class);
+ when(columnMetadata.getColumnName()).thenReturn(column);
+ when(columnMetadata.hasDictionary()).thenReturn(hasDictionary);
+ when(columnMetadata.getCardinality()).thenReturn(cardinality);
+ return columnMetadata;
+ }
+
+ private JsonNode convertStringListToJsonNode(List<List<String>> columnList) {
+ // Create arrayNode from the list of string
+ ArrayNode arrayNode = new ArrayNode(JsonNodeFactory.instance);
+ for (List<String> column : columnList) {
+ ObjectNode objectNode = new ObjectNode(JsonNodeFactory.instance);
+ objectNode.put("columnName", column.get(0));
+ objectNode.put("hasDictionary", column.get(1));
+ objectNode.put("cardinality", column.get(2));
+ arrayNode.add(objectNode);
+ }
+ ObjectNode rootNode = new ObjectNode(JsonNodeFactory.instance);
+
+ // set the rootNode key as "columns" and value as arrayNode.
+ rootNode.set("columns", arrayNode);
+ return rootNode;
+ }
+}
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfigTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfigTest.java
index 7004f901b3..726ee3feee 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfigTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfigTest.java
@@ -18,6 +18,10 @@
*/
package org.apache.pinot.segment.local.startree.v2.builder;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
@@ -97,6 +101,39 @@ public class StarTreeV2BuilderConfigTest {
assertEquals(defaultConfig.getMaxLeafRecords(), StarTreeV2BuilderConfig.DEFAULT_MAX_LEAF_RECORDS);
}
+ @Test
+ public void testDefaultConfigFromJsonNodeSegmentMetadata() {
+ Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("d1", DataType.INT)
+ .addSingleValueDimension("d2", DataType.LONG).addSingleValueDimension("d3", DataType.FLOAT)
+ .addSingleValueDimension("d4", DataType.DOUBLE).addMultiValueDimension("d5", DataType.INT)
+ .addMetric("m1", DataType.DOUBLE).addMetric("m2", DataType.BYTES)
+ .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "t"), null)
+ .addDateTime("dt", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:HOURS").build();
+
+ // Create a list of string with column name, hasDictionary and cardinality.
+ List<List<String>> columnList =
+ Arrays.asList(Arrays.asList("d1", "true", "200"), Arrays.asList("d2", "true", "400"),
+ Arrays.asList("d3", "true", "20000"), Arrays.asList("d4", "false", "100"),
+ Arrays.asList("d5", "true", "100"), Arrays.asList("m1", "false", "-1"), Arrays.asList("m2", "true", "100"),
+ Arrays.asList("t", "true", "20000"), Arrays.asList("dt", "true", "30000"));
+
+ // Convert the list of string to JsonNode with appropriate key names.
+ JsonNode segmentMetadataAsJsonNode = convertStringListToJsonNode(columnList);
+
+ // Generate default config from the schema and segment metadata as JsonNode.
+ StarTreeV2BuilderConfig defaultConfig =
+ StarTreeV2BuilderConfig.generateDefaultConfig(schema, segmentMetadataAsJsonNode);
+ // Sorted by cardinality in descending order, followed by the time column
+ assertEquals(defaultConfig.getDimensionsSplitOrder(), Arrays.asList("d2", "d1", "dt", "t"));
+ // No column should be skipped for star-node creation
+ assertTrue(defaultConfig.getSkipStarNodeCreationForDimensions().isEmpty());
+ // Should have COUNT(*) and SUM(m1) as the function column pairs
+ assertEquals(defaultConfig.getFunctionColumnPairs(), new HashSet<>(
+ Arrays.asList(AggregationFunctionColumnPair.COUNT_STAR,
+ new AggregationFunctionColumnPair(AggregationFunctionType.SUM, "m1"))));
+ assertEquals(defaultConfig.getMaxLeafRecords(), StarTreeV2BuilderConfig.DEFAULT_MAX_LEAF_RECORDS);
+ }
+
@Test
public void testBuildFromIndexConfig() {
List<StarTreeAggregationConfig> aggregationConfigs =
@@ -149,4 +186,17 @@ public class StarTreeV2BuilderConfigTest {
when(columnMetadata.getCardinality()).thenReturn(cardinality);
return columnMetadata;
}
+
+ private JsonNode convertStringListToJsonNode(List<List<String>> columnList) {
+ // Create arrayNode from the list of string
+ ArrayNode arrayNode = new ArrayNode(JsonNodeFactory.instance);
+ for (List<String> column : columnList) {
+ ObjectNode objectNode = new ObjectNode(JsonNodeFactory.instance);
+ objectNode.put("columnName", column.get(0));
+ objectNode.put("hasDictionary", column.get(1));
+ objectNode.put("cardinality", column.get(2));
+ arrayNode.add(objectNode);
+ }
+ return arrayNode;
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org