You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by sn...@apache.org on 2024/02/06 16:57:39 UTC

(pinot) branch master updated: Add Helper Functions in StarTreeBuilderUtils and StarTreeV2BuilderConfig (#12361)

This is an automated email from the ASF dual-hosted git repository.

snlee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new cad8c56d9d Add Helper Functions in StarTreeBuilderUtils and StarTreeV2BuilderConfig (#12361)
cad8c56d9d is described below

commit cad8c56d9d05d0b8e19a580f8652b6b2fb392fda
Author: aishikbh <ai...@startree.ai>
AuthorDate: Tue Feb 6 22:27:32 2024 +0530

    Add Helper Functions in StarTreeBuilderUtils and StarTreeV2BuilderConfig (#12361)
    
    * changed some stuffs
    
    * fixed an issue for generating default builder configs
    
    * optimize passing segment metadata
    
    * Add helper function and unit test
    
    * Added helper function to compare 2 StarTree builder configs.
    * Added unit test for generateDefaultConfig from JsonNode segment metadata.
    
    * add additional unit tests.
    
    * addressing comments
---
 .../local/startree/StarTreeBuilderUtils.java       |  42 ++++
 .../v2/builder/StarTreeV2BuilderConfig.java        |  77 ++++++++
 .../v2/builder/StarTreeBuilderUtilsTest.java       | 218 +++++++++++++++++++++
 .../v2/builder/StarTreeV2BuilderConfigTest.java    |  50 +++++
 4 files changed, 387 insertions(+)

diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java
index 59625678eb..e45eb0526d 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/StarTreeBuilderUtils.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.segment.local.startree;
 
+import com.fasterxml.jackson.databind.JsonNode;
 import java.io.File;
 import java.io.IOException;
 import java.nio.ByteOrder;
@@ -37,6 +38,7 @@ import org.apache.pinot.segment.spi.memory.PinotDataBuffer;
 import org.apache.pinot.segment.spi.store.SegmentDirectoryPaths;
 import org.apache.pinot.segment.spi.utils.SegmentMetadataUtils;
 import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
+import org.apache.pinot.spi.data.Schema;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 
@@ -83,6 +85,27 @@ public class StarTreeBuilderUtils {
     return builderConfigs;
   }
 
+  public static List<StarTreeV2BuilderConfig> generateBuilderConfigs(@Nullable List<StarTreeIndexConfig> indexConfigs,
+      boolean enableDefaultStarTree, Schema schema, JsonNode segmentMetadata) {
+    List<StarTreeV2BuilderConfig> builderConfigs = new ArrayList<>();
+    if (indexConfigs != null) {
+      for (StarTreeIndexConfig indexConfig : indexConfigs) {
+        StarTreeV2BuilderConfig builderConfig = StarTreeV2BuilderConfig.fromIndexConfig(indexConfig);
+        if (!builderConfigs.contains(builderConfig)) {
+          builderConfigs.add(builderConfig);
+        }
+      }
+    }
+    if (enableDefaultStarTree) {
+      StarTreeV2BuilderConfig defaultConfig =
+          StarTreeV2BuilderConfig.generateDefaultConfig(schema, segmentMetadata.get("columns"));
+      if (!builderConfigs.contains(defaultConfig)) {
+        builderConfigs.add(defaultConfig);
+      }
+    }
+    return builderConfigs;
+  }
+
   /**
    * Serialize the star-tree structure into a file.
    */
@@ -254,6 +277,25 @@ public class StarTreeBuilderUtils {
     return false;
   }
 
+  /**
+   * Returns {@code true} if the given star-tree builder configs are equal, {@code false} otherwise.
+   */
+  public static boolean areStarTreeBuilderConfigListsEqual(List<StarTreeV2BuilderConfig> builderConfig1,
+      List<StarTreeV2BuilderConfig> builderConfig2) {
+    int numStarTrees = builderConfig1.size();
+    if (builderConfig2.size() != numStarTrees) {
+      return false;
+    }
+    for (int i = 0; i < numStarTrees; i++) {
+      StarTreeV2BuilderConfig builderConfigToCompare1 = builderConfig1.get(i);
+      StarTreeV2BuilderConfig builderConfigToCompare2 = builderConfig2.get(i);
+      if (!builderConfigToCompare1.equals(builderConfigToCompare2)) {
+        return false;
+      }
+    }
+    return true;
+  }
+
   /**
    * Removes all the star-trees from the given segment.
    */
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
index b11e3b2b24..eca08f8787 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfig.java
@@ -18,9 +18,11 @@
  */
 package org.apache.pinot.segment.local.startree.v2.builder;
 
+import com.fasterxml.jackson.databind.JsonNode;
 import com.google.common.base.Preconditions;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -189,6 +191,81 @@ public class StarTreeV2BuilderConfig {
         DEFAULT_MAX_LEAF_RECORDS);
   }
 
+  public static StarTreeV2BuilderConfig generateDefaultConfig(Schema schema, JsonNode columnsMetadata) {
+    List<JsonNode> dimensionColumnMetadataList = new ArrayList<>();
+    List<JsonNode> timeColumnMetadataList = new ArrayList<>();
+    List<String> numericMetrics = new ArrayList<>();
+    Preconditions.checkState(!columnsMetadata.isNull(), "columnsMetadata should not be null.");
+    Preconditions.checkState(!columnsMetadata.isEmpty(), "columnsMetadata should not be empty.");
+
+    // Convert columnsMetadata to a map for easy lookup.
+    Map<String, JsonNode> columnMetadataMap = convertJsonNodeToMap(columnsMetadata);
+
+    for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
+      if (!fieldSpec.isSingleValueField() || fieldSpec.isVirtualColumn()) {
+        continue;
+      }
+      String column = fieldSpec.getName();
+      switch (fieldSpec.getFieldType()) {
+        case DIMENSION:
+          JsonNode columnMetadata = columnMetadataMap.get(column);
+          if (columnMetadata.get("hasDictionary").asBoolean()
+              && columnMetadata.get("cardinality").asInt() <= DIMENSION_CARDINALITY_THRESHOLD_FOR_DEFAULT_CONFIG) {
+            dimensionColumnMetadataList.add(columnMetadata);
+          }
+          break;
+        case DATE_TIME:
+        case TIME:
+          columnMetadata = columnMetadataMap.get(column);
+          if (columnMetadata.get("hasDictionary").asBoolean()) {
+            timeColumnMetadataList.add(columnMetadata);
+          }
+          break;
+        case METRIC:
+          if (fieldSpec.getDataType().isNumeric()) {
+            numericMetrics.add(column);
+          }
+          break;
+        default:
+          break;
+      }
+    }
+
+    // Sort all dimensions/time columns with their cardinality in descending order
+    dimensionColumnMetadataList.sort(
+        (o1, o2) -> Integer.compare(o2.get("cardinality").asInt(), o1.get("cardinality").asInt()));
+    timeColumnMetadataList.sort(
+        (o1, o2) -> Integer.compare(o2.get("cardinality").asInt(), o1.get("cardinality").asInt()));
+
+    List<String> dimensionsSplitOrder = new ArrayList<>();
+    for (JsonNode dimensionColumnMetadata : dimensionColumnMetadataList) {
+      dimensionsSplitOrder.add(dimensionColumnMetadata.get("columnName").asText());
+    }
+    for (JsonNode timeColumnMetadata : timeColumnMetadataList) {
+      dimensionsSplitOrder.add(timeColumnMetadata.get("columnName").asText());
+    }
+    Preconditions.checkState(!dimensionsSplitOrder.isEmpty(), "No qualified dimension found for star-tree split order");
+
+    TreeMap<AggregationFunctionColumnPair, AggregationSpec> aggregationSpecs = new TreeMap<>();
+    aggregationSpecs.put(AggregationFunctionColumnPair.COUNT_STAR, AggregationSpec.DEFAULT);
+    for (String numericMetric : numericMetrics) {
+      aggregationSpecs.put(new AggregationFunctionColumnPair(AggregationFunctionType.SUM, numericMetric),
+          AggregationSpec.DEFAULT);
+    }
+
+    return new StarTreeV2BuilderConfig(dimensionsSplitOrder, Collections.emptySet(), aggregationSpecs,
+        DEFAULT_MAX_LEAF_RECORDS);
+  }
+
+  public static Map<String, JsonNode> convertJsonNodeToMap(JsonNode columnsMetadata) {
+    Map<String, JsonNode> map = new HashMap<>();
+    for (JsonNode columnMetadata : columnsMetadata) {
+      String columnName = columnMetadata.get("columnName").asText();
+      map.put(columnName, columnMetadata);
+    }
+    return map;
+  }
+
   private StarTreeV2BuilderConfig(List<String> dimensionsSplitOrder, Set<String> skipStarNodeCreationForDimensions,
       TreeMap<AggregationFunctionColumnPair, AggregationSpec> aggregationSpecs, int maxLeafRecords) {
     _dimensionsSplitOrder = dimensionsSplitOrder;
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeBuilderUtilsTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeBuilderUtilsTest.java
new file mode 100644
index 0000000000..71ee7c2251
--- /dev/null
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeBuilderUtilsTest.java
@@ -0,0 +1,218 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.startree.v2.builder;
+
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import org.apache.pinot.segment.local.startree.StarTreeBuilderUtils;
+import org.apache.pinot.segment.spi.ColumnMetadata;
+import org.apache.pinot.segment.spi.Constants;
+import org.apache.pinot.segment.spi.index.metadata.SegmentMetadataImpl;
+import org.apache.pinot.segment.spi.index.startree.AggregationFunctionColumnPair;
+import org.apache.pinot.spi.config.table.StarTreeAggregationConfig;
+import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.data.TimeGranularitySpec;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.AssertJUnit.assertEquals;
+import static org.testng.AssertJUnit.assertFalse;
+import static org.testng.AssertJUnit.assertTrue;
+
+
+public class StarTreeBuilderUtilsTest {
+  @Test
+  public void testAreStarTreeBuilderConfigListsEqual() {
+    // Create StartTreeIndexConfigs to test for unequal starTree configs.
+    StarTreeIndexConfig starTreeIndexConfig1 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null,
+        Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+    // Different skip star node creation.
+    StarTreeIndexConfig starTreeIndexConfig2 =
+        new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), Collections.singletonList("Distance"),
+            Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+    // Different dimension split order.
+    StarTreeIndexConfig starTreeIndexConfig3 = new StarTreeIndexConfig(Arrays.asList("Distance", "Carrier"), null,
+        Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+    // Different max leaf records.
+    StarTreeIndexConfig starTreeIndexConfig4 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null,
+        Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 200);
+
+    // Create StartTreeAggregationConfigs with StarTreeAggregationConfig.
+    StarTreeAggregationConfig starTreeAggregationConfig1 = new StarTreeAggregationConfig("Distance", "MAX", null);
+
+    // Different AggregationConfig.
+    StarTreeIndexConfig starTreeIndexConfig5 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null, null,
+        Collections.singletonList(starTreeAggregationConfig1), 100);
+
+    // Create StarTreeIndexConfig for equality check.
+    StarTreeIndexConfig starTreeIndexConfig6 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null,
+        Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+    // test unequal builder config size.
+    List<StarTreeV2BuilderConfig> config1 = new ArrayList<>();
+    List<StarTreeV2BuilderConfig> config2 = new ArrayList<>();
+
+    // Add two StartTreeV2BuilderConfigs to config1 and one to config2.
+    config1.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig1));
+    config1.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig2));
+
+    config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig3));
+    assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+    // Test different dimension split order in StartTreeV2BuilderConfig.
+    config1.clear();
+    config1.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig1));
+
+    assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+    // Test different skip star node creation in StartTreeV2BuilderConfig.
+    config2.clear();
+    config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig2));
+
+    assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+    // Test different max leaf records in StartTreeV2BuilderConfig.
+    config2.clear();
+    config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig4));
+
+    assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+    // Test different aggregation configs in StartTreeV2BuilderConfig.
+    config2.clear();
+    config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig5));
+
+    assertFalse(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+
+    // Test equal StartTreeV2BuilderConfigs.
+    config2.clear();
+    config2.add(StarTreeV2BuilderConfig.fromIndexConfig(starTreeIndexConfig6));
+
+    assertTrue(StarTreeBuilderUtils.areStarTreeBuilderConfigListsEqual(config1, config2));
+  }
+
+  @Test
+  public void testGenerateBuilderConfig() {
+
+    // Create Schema and SegmentMetadata for testing.
+    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("d1", FieldSpec.DataType.INT)
+        .addSingleValueDimension("d2", FieldSpec.DataType.LONG).addSingleValueDimension("d3", FieldSpec.DataType.FLOAT)
+        .addSingleValueDimension("d4", FieldSpec.DataType.DOUBLE).addMultiValueDimension("d5", FieldSpec.DataType.INT)
+        .addMetric("m1", FieldSpec.DataType.DOUBLE).addMetric("m2", FieldSpec.DataType.BYTES)
+        .addTime(new TimeGranularitySpec(FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS, "t"), null)
+        .addDateTime("dt", FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:HOURS").build();
+    SegmentMetadataImpl segmentMetadata = mock(SegmentMetadataImpl.class);
+    when(segmentMetadata.getSchema()).thenReturn(schema);
+    // Included
+    ColumnMetadata columnMetadata = getColumnMetadata("d1", true, 200);
+    when(segmentMetadata.getColumnMetadataFor("d1")).thenReturn(columnMetadata);
+    // Included
+    columnMetadata = getColumnMetadata("d2", true, 400);
+    when(segmentMetadata.getColumnMetadataFor("d2")).thenReturn(columnMetadata);
+    // Not included because the cardinality is too high
+    columnMetadata = getColumnMetadata("d3", true, 20000);
+    when(segmentMetadata.getColumnMetadataFor("d3")).thenReturn(columnMetadata);
+    // Not included because it is not dictionary-encoded
+    columnMetadata = getColumnMetadata("d4", false, 100);
+    when(segmentMetadata.getColumnMetadataFor("d4")).thenReturn(columnMetadata);
+    // Not included because it is multi-valued
+    columnMetadata = getColumnMetadata("d5", true, 100);
+    when(segmentMetadata.getColumnMetadataFor("d5")).thenReturn(columnMetadata);
+    // Included (metric does not have to be dictionary-encoded or have valid cardinality)
+    columnMetadata = getColumnMetadata("m1", false, Constants.UNKNOWN_CARDINALITY);
+    when(segmentMetadata.getColumnMetadataFor("m1")).thenReturn(columnMetadata);
+    // Not included because it is not numeric
+    columnMetadata = getColumnMetadata("m2", true, 100);
+    when(segmentMetadata.getColumnMetadataFor("m2")).thenReturn(columnMetadata);
+    // Included (do not check cardinality for time column)
+    columnMetadata = getColumnMetadata("t", true, 20000);
+    when(segmentMetadata.getColumnMetadataFor("t")).thenReturn(columnMetadata);
+    // Included (do not check cardinality for date time column)
+    columnMetadata = getColumnMetadata("dt", true, 30000);
+    when(segmentMetadata.getColumnMetadataFor("dt")).thenReturn(columnMetadata);
+
+    // // Create a list of string with column name, hasDictionary and cardinality.
+    List<List<String>> columnList =
+        Arrays.asList(Arrays.asList("d1", "true", "200"), Arrays.asList("d2", "true", "400"),
+            Arrays.asList("d3", "true", "20000"), Arrays.asList("d4", "false", "100"),
+            Arrays.asList("d5", "true", "100"), Arrays.asList("m1", "false", "-1"), Arrays.asList("m2", "true", "100"),
+            Arrays.asList("t", "true", "20000"), Arrays.asList("dt", "true", "30000"));
+
+    // Convert the list of string to JsonNode with appropriate key names and root node.
+    JsonNode segmentMetadataAsJsonNode = convertStringListToJsonNode(columnList);
+
+    // Create StartTreeIndexConfig for testing.
+    StarTreeIndexConfig starTreeIndexConfig1 = new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), null,
+        Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+    StarTreeIndexConfig starTreeIndexConfig2 =
+        new StarTreeIndexConfig(Arrays.asList("Carrier", "Distance"), Collections.singletonList("Distance"),
+            Collections.singletonList(AggregationFunctionColumnPair.COUNT_STAR.toColumnName()), null, 100);
+
+    // Create StartTreeV2BuilderConfig from segmentMetadataImpl.
+    List<StarTreeV2BuilderConfig> builderConfig1 =
+        StarTreeBuilderUtils.generateBuilderConfigs(Arrays.asList(starTreeIndexConfig1, starTreeIndexConfig2), true,
+            segmentMetadata);
+
+    // Create StartTreeV2BuilderConfig from JsonNode.
+    List<StarTreeV2BuilderConfig> builderConfig2 =
+        StarTreeBuilderUtils.generateBuilderConfigs(Arrays.asList(starTreeIndexConfig1, starTreeIndexConfig2), true,
+            schema, segmentMetadataAsJsonNode);
+
+    // They should be equal.
+    assertEquals(builderConfig1, builderConfig2);
+  }
+
+  private ColumnMetadata getColumnMetadata(String column, boolean hasDictionary, int cardinality) {
+    ColumnMetadata columnMetadata = mock(ColumnMetadata.class);
+    when(columnMetadata.getColumnName()).thenReturn(column);
+    when(columnMetadata.hasDictionary()).thenReturn(hasDictionary);
+    when(columnMetadata.getCardinality()).thenReturn(cardinality);
+    return columnMetadata;
+  }
+
+  private JsonNode convertStringListToJsonNode(List<List<String>> columnList) {
+    // Create arrayNode from the list of string
+    ArrayNode arrayNode = new ArrayNode(JsonNodeFactory.instance);
+    for (List<String> column : columnList) {
+      ObjectNode objectNode = new ObjectNode(JsonNodeFactory.instance);
+      objectNode.put("columnName", column.get(0));
+      objectNode.put("hasDictionary", column.get(1));
+      objectNode.put("cardinality", column.get(2));
+      arrayNode.add(objectNode);
+    }
+    ObjectNode rootNode = new ObjectNode(JsonNodeFactory.instance);
+
+    // set the rootNode key as "columns" and value as arrayNode.
+    rootNode.set("columns", arrayNode);
+    return rootNode;
+  }
+}
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfigTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfigTest.java
index 7004f901b3..726ee3feee 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfigTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/startree/v2/builder/StarTreeV2BuilderConfigTest.java
@@ -18,6 +18,10 @@
  */
 package org.apache.pinot.segment.local.startree.v2.builder;
 
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.JsonNodeFactory;
+import com.fasterxml.jackson.databind.node.ObjectNode;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashSet;
@@ -97,6 +101,39 @@ public class StarTreeV2BuilderConfigTest {
     assertEquals(defaultConfig.getMaxLeafRecords(), StarTreeV2BuilderConfig.DEFAULT_MAX_LEAF_RECORDS);
   }
 
+  @Test
+  public void testDefaultConfigFromJsonNodeSegmentMetadata() {
+    Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("d1", DataType.INT)
+        .addSingleValueDimension("d2", DataType.LONG).addSingleValueDimension("d3", DataType.FLOAT)
+        .addSingleValueDimension("d4", DataType.DOUBLE).addMultiValueDimension("d5", DataType.INT)
+        .addMetric("m1", DataType.DOUBLE).addMetric("m2", DataType.BYTES)
+        .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, "t"), null)
+        .addDateTime("dt", DataType.LONG, "1:MILLISECONDS:EPOCH", "1:HOURS").build();
+
+    // Create a list of string with column name, hasDictionary and cardinality.
+    List<List<String>> columnList =
+        Arrays.asList(Arrays.asList("d1", "true", "200"), Arrays.asList("d2", "true", "400"),
+            Arrays.asList("d3", "true", "20000"), Arrays.asList("d4", "false", "100"),
+            Arrays.asList("d5", "true", "100"), Arrays.asList("m1", "false", "-1"), Arrays.asList("m2", "true", "100"),
+            Arrays.asList("t", "true", "20000"), Arrays.asList("dt", "true", "30000"));
+
+    // Convert the list of string to JsonNode with appropriate key names.
+    JsonNode segmentMetadataAsJsonNode = convertStringListToJsonNode(columnList);
+
+    // Generate default config from the schema and segment metadata as JsonNode.
+    StarTreeV2BuilderConfig defaultConfig =
+        StarTreeV2BuilderConfig.generateDefaultConfig(schema, segmentMetadataAsJsonNode);
+    // Sorted by cardinality in descending order, followed by the time column
+    assertEquals(defaultConfig.getDimensionsSplitOrder(), Arrays.asList("d2", "d1", "dt", "t"));
+    // No column should be skipped for star-node creation
+    assertTrue(defaultConfig.getSkipStarNodeCreationForDimensions().isEmpty());
+    // Should have COUNT(*) and SUM(m1) as the function column pairs
+    assertEquals(defaultConfig.getFunctionColumnPairs(), new HashSet<>(
+        Arrays.asList(AggregationFunctionColumnPair.COUNT_STAR,
+            new AggregationFunctionColumnPair(AggregationFunctionType.SUM, "m1"))));
+    assertEquals(defaultConfig.getMaxLeafRecords(), StarTreeV2BuilderConfig.DEFAULT_MAX_LEAF_RECORDS);
+  }
+
   @Test
   public void testBuildFromIndexConfig() {
     List<StarTreeAggregationConfig> aggregationConfigs =
@@ -149,4 +186,17 @@ public class StarTreeV2BuilderConfigTest {
     when(columnMetadata.getCardinality()).thenReturn(cardinality);
     return columnMetadata;
   }
+
+  private JsonNode convertStringListToJsonNode(List<List<String>> columnList) {
+    // Create arrayNode from the list of string
+    ArrayNode arrayNode = new ArrayNode(JsonNodeFactory.instance);
+    for (List<String> column : columnList) {
+      ObjectNode objectNode = new ObjectNode(JsonNodeFactory.instance);
+      objectNode.put("columnName", column.get(0));
+      objectNode.put("hasDictionary", column.get(1));
+      objectNode.put("cardinality", column.get(2));
+      arrayNode.add(objectNode);
+    }
+    return arrayNode;
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org