You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ha...@apache.org on 2020/09/18 04:03:02 UTC
[incubator-pinot] branch master updated: Table indexing config
validation (#6017)
This is an automated email from the ASF dual-hosted git repository.
haibow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 5548e79 Table indexing config validation (#6017)
5548e79 is described below
commit 5548e791f9273ec9ffd74f237385c9e7a531f9f3
Author: icefury71 <ch...@gmail.com>
AuthorDate: Thu Sep 17 21:02:48 2020 -0700
Table indexing config validation (#6017)
* Adding validation for table indexing config to check for valid column names
* * Addressing review comments: adding validation for FieldConfigList. Adding javadocs
* Bug fix in integration test regarding removal of existing column from schema
* Adding validation for StarTreeIndexingConfig
* Addressing review comments
---
.../apache/pinot/core/util/TableConfigUtils.java | 114 ++++++++++++++-
.../pinot/core/util/TableConfigUtilsTest.java | 154 +++++++++++++++++++++
...ulls_default_column_test_missing_columns.schema | 4 +-
.../spi/utils/builder/TableConfigBuilder.java | 15 ++
4 files changed, 282 insertions(+), 5 deletions(-)
diff --git a/pinot-core/src/main/java/org/apache/pinot/core/util/TableConfigUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/util/TableConfigUtils.java
index dfe93a3..c91c7f9 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/util/TableConfigUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/util/TableConfigUtils.java
@@ -20,8 +20,10 @@ package org.apache.pinot.core.util;
import com.google.common.base.Preconditions;
import java.util.Collections;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
+import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.pinot.common.tier.TierFactory;
@@ -29,8 +31,12 @@ import org.apache.pinot.common.utils.CommonConstants;
import org.apache.pinot.common.utils.config.TagNameUtils;
import org.apache.pinot.core.data.function.FunctionEvaluator;
import org.apache.pinot.core.data.function.FunctionEvaluatorFactory;
+import org.apache.pinot.core.startree.v2.AggregationFunctionColumnPair;
+import org.apache.pinot.spi.config.table.FieldConfig;
+import org.apache.pinot.spi.config.table.IndexingConfig;
import org.apache.pinot.spi.config.table.IngestionConfig;
import org.apache.pinot.spi.config.table.SegmentsValidationAndRetentionConfig;
+import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.config.table.TierConfig;
@@ -55,8 +61,9 @@ public final class TableConfigUtils {
* 1. Validation config
* 2. IngestionConfig
* 3. TierConfigs
+ * 4. Indexing config
*
- * TODO: Add more validations for each section (e.g. verify column names used in the indexing, validate conditions are met for aggregateMetrics etc)
+ * TODO: Add more validations for each section (e.g. validate conditions are met for aggregateMetrics)
*/
public static void validate(TableConfig tableConfig, @Nullable Schema schema) {
if (tableConfig.getTableType() == TableType.REALTIME) {
@@ -65,6 +72,8 @@ public final class TableConfigUtils {
validateValidationConfig(tableConfig, schema);
validateIngestionConfig(tableConfig.getIngestionConfig(), schema);
validateTierConfigList(tableConfig.getTierConfigsList());
+ validateIndexingConfig(tableConfig.getIndexingConfig(), schema);
+ validateFieldConfigList(tableConfig.getFieldConfigList(), schema);
}
/**
@@ -75,8 +84,8 @@ public final class TableConfigUtils {
*/
public static void validateTableName(TableConfig tableConfig) {
String tableName = tableConfig.getTableName();
- if (tableName.contains(".")) {
- throw new IllegalStateException("Table name: '" + tableName + "' containing '.' is not allowed");
+ if (tableName.contains(".") || tableName.contains(" ")) {
+ throw new IllegalStateException("Table name: '" + tableName + "' containing '.' or space is not allowed");
}
}
@@ -225,4 +234,103 @@ public final class TableConfigUtils {
}
}
}
+
+ /**
+ * Validates the Indexing Config
+ * Ensures that every referred column name exists in the corresponding schema
+ */
+ private static void validateIndexingConfig(@Nullable IndexingConfig indexingConfig, @Nullable Schema schema) {
+ if (indexingConfig == null || schema == null) {
+ return;
+ }
+ Map<String, String> columnNameToConfigMap = new HashMap<>();
+
+ if (indexingConfig.getBloomFilterColumns() != null) {
+ for (String columnName : indexingConfig.getBloomFilterColumns()) {
+ columnNameToConfigMap.put(columnName, "Bloom Filter Config");
+ }
+ }
+ if (indexingConfig.getInvertedIndexColumns() != null) {
+ for (String columnName : indexingConfig.getInvertedIndexColumns()) {
+ columnNameToConfigMap.put(columnName, "Inverted Index Config");
+ }
+ }
+ if (indexingConfig.getNoDictionaryColumns() != null) {
+ for (String columnName : indexingConfig.getNoDictionaryColumns()) {
+ columnNameToConfigMap.put(columnName, "No Dictionary Column Config");
+ }
+ }
+ if (indexingConfig.getOnHeapDictionaryColumns() != null) {
+ for (String columnName : indexingConfig.getOnHeapDictionaryColumns()) {
+ columnNameToConfigMap.put(columnName, "On Heap Dictionary Column Config");
+ }
+ }
+ if (indexingConfig.getRangeIndexColumns() != null) {
+ for (String columnName : indexingConfig.getRangeIndexColumns()) {
+ columnNameToConfigMap.put(columnName, "Range Column Config");
+ }
+ }
+ if (indexingConfig.getSortedColumn() != null) {
+ for (String columnName : indexingConfig.getSortedColumn()) {
+ columnNameToConfigMap.put(columnName, "Sorted Column Config");
+ }
+ }
+ if (indexingConfig.getVarLengthDictionaryColumns() != null) {
+ for (String columnName : indexingConfig.getVarLengthDictionaryColumns()) {
+ columnNameToConfigMap.put(columnName, "Var Length Column Config");
+ }
+ }
+ List<StarTreeIndexConfig> starTreeIndexConfigList = indexingConfig.getStarTreeIndexConfigs();
+ if (starTreeIndexConfigList != null) {
+ for (StarTreeIndexConfig starTreeIndexConfig : starTreeIndexConfigList) {
+ // Dimension split order cannot be null
+ for (String columnName : starTreeIndexConfig.getDimensionsSplitOrder()) {
+ columnNameToConfigMap.put(columnName, "StarTreeIndex Config");
+ }
+ // Function column pairs cannot be null
+ for (String functionColumnPair : starTreeIndexConfig.getFunctionColumnPairs()) {
+ AggregationFunctionColumnPair columnPair;
+ try {
+ columnPair = AggregationFunctionColumnPair.fromColumnName(functionColumnPair);
+ } catch (Exception e) {
+ throw new IllegalStateException("Invalid StarTreeIndex config: " + functionColumnPair + ". Must be"
+ + "in the form <Aggregation function>__<Column name>");
+ }
+ String columnName = columnPair.getColumn();
+ if (!columnName.equals(AggregationFunctionColumnPair.STAR)) {
+ columnNameToConfigMap.put(columnName, "StarTreeIndex Config");
+ }
+ }
+ List<String> skipDimensionList = starTreeIndexConfig.getSkipStarNodeCreationForDimensions();
+ if (skipDimensionList != null) {
+ for (String columnName : skipDimensionList) {
+ columnNameToConfigMap.put(columnName, "StarTreeIndex Config");
+ }
+ }
+ }
+ }
+
+ for (Map.Entry<String, String> entry : columnNameToConfigMap.entrySet()) {
+ String columnName = entry.getKey();
+ String configName = entry.getValue();
+ Preconditions.checkState(schema.getFieldSpecFor(columnName) != null,
+ "Column Name " + columnName + " defined in " + configName + " must be a valid column defined in the schema");
+ }
+ }
+
+ /**
+ * Validates the Field Config List in the given TableConfig
+ * Ensures that every referred column name exists in the corresponding schema
+ */
+ private static void validateFieldConfigList(@Nullable List<FieldConfig> fieldConfigList, @Nullable Schema schema) {
+ if (fieldConfigList == null || schema == null) {
+ return;
+ }
+
+ for (FieldConfig fieldConfig : fieldConfigList) {
+ String columnName = fieldConfig.getName();
+ Preconditions.checkState(schema.getFieldSpecFor(columnName) != null,
+ "Column Name " + columnName + " defined in field config list must be a valid column defined in the schema");
+ }
+ }
}
diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/TableConfigUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/TableConfigUtilsTest.java
index 0634589..32e658e 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/util/TableConfigUtilsTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/util/TableConfigUtilsTest.java
@@ -19,9 +19,12 @@
package org.apache.pinot.core.util;
import com.google.common.collect.Lists;
+import java.util.Arrays;
import java.util.Collections;
import org.apache.pinot.common.tier.TierFactory;
+import org.apache.pinot.spi.config.table.FieldConfig;
import org.apache.pinot.spi.config.table.IngestionConfig;
+import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
import org.apache.pinot.spi.config.table.TierConfig;
@@ -456,4 +459,155 @@ public class TableConfigUtilsTest {
// expected
}
}
+
+ @Test
+ public void testTableName() {
+ String[] malformedTableName = {"test.table", "test table"};
+ for (int i = 0; i < 2; i++) {
+ String tableName = malformedTableName[i];
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName).build();
+ try {
+ TableConfigUtils.validateTableName(tableConfig);
+ Assert.fail("Should fail for malformed table name : " + tableName);
+ } catch (IllegalStateException e) {
+ // expected
+ }
+ }
+ }
+
+ @Test
+ public void testValidateIndexingConfig() {
+ Schema schema =
+ new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension("myCol", FieldSpec.DataType.STRING)
+ .build();
+ TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setBloomFilterColumns(Arrays.asList("myCol2")).build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid Bloom filter column name");
+ } catch (Exception e) {
+ // expected
+ }
+
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setInvertedIndexColumns(Arrays.asList("myCol2")).build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid Inverted Index column name");
+ } catch (Exception e) {
+ // expected
+ }
+
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setNoDictionaryColumns(Arrays.asList("myCol2")).build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid No Dictionary column name");
+ } catch (Exception e) {
+ // expected
+ }
+
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setOnHeapDictionaryColumns(Arrays.asList("myCol2")).build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid On Heap Dictionary column name");
+ } catch (Exception e) {
+ // expected
+ }
+
+ tableConfig =
+ new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setRangeIndexColumns(Arrays.asList("myCol2"))
+ .build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid Range Index column name");
+ } catch (Exception e) {
+ // expected
+ }
+
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setSortedColumn("myCol2").build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid Sorted column name");
+ } catch (Exception e) {
+ // expected
+ }
+
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setVarLengthDictionaryColumns(Arrays.asList("myCol2")).build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid Var Length Dictionary column name");
+ } catch (Exception e) {
+ // expected
+ }
+
+ // Although this config makes no sense, it should pass the validation phase
+ StarTreeIndexConfig starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList("myCol"),
+ Arrays.asList("myCol"),
+ Arrays.asList("SUM__myCol"),
+ 1);
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setStarTreeIndexConfigs(Arrays.asList(starTreeIndexConfig))
+ .build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ } catch (Exception e) {
+ Assert.fail("Should fail for valid StarTreeIndex config column name");
+ // expected
+ }
+
+ starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList("myCol2"),
+ Arrays.asList("myCol"),
+ Arrays.asList("SUM__myCol"),
+ 1);
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setStarTreeIndexConfigs(Arrays.asList(starTreeIndexConfig))
+ .build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid StarTreeIndex config column name in dimension split order");
+ } catch (Exception e) {
+ // expected
+ }
+
+ starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList("myCol"),
+ Arrays.asList("myCol2"),
+ Arrays.asList("SUM__myCol"),
+ 1);
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setStarTreeIndexConfigs(Arrays.asList(starTreeIndexConfig))
+ .build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid StarTreeIndex config column name in skip star node for dimension");
+ } catch (Exception e) {
+ // expected
+ }
+
+ starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList("myCol"),
+ Arrays.asList("myCol"),
+ Arrays.asList("SUM__myCol2"),
+ 1);
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setStarTreeIndexConfigs(Arrays.asList(starTreeIndexConfig))
+ .build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid StarTreeIndex config column name in function column pair");
+ } catch (Exception e) {
+ // expected
+ }
+
+ FieldConfig fieldConfig = new FieldConfig("myCol2", null, null, null);
+ tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME)
+ .setFieldConfigList(Arrays.asList(fieldConfig)).build();
+ try {
+ TableConfigUtils.validate(tableConfig, schema);
+ Assert.fail("Should fail for invalid column name in Field Config List");
+ } catch (Exception e) {
+ // expected
+ }
+ }
}
diff --git a/pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_missing_columns.schema b/pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_missing_columns.schema
index ba8d12f..e7e1776 100644
--- a/pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_missing_columns.schema
+++ b/pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_missing_columns.schema
@@ -253,7 +253,7 @@
],
"metricFieldSpecs": [
{
- "name": "ArrDel15",
+ "name": "ActualElapsedTime",
"dataType": "INT"
},
{
@@ -316,4 +316,4 @@
"timeType": "DAYS"
}
}
-}
\ No newline at end of file
+}
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java
index 3891bf9..0e32a02 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java
@@ -32,6 +32,7 @@ import org.apache.pinot.spi.config.table.ReplicaGroupStrategyConfig;
import org.apache.pinot.spi.config.table.RoutingConfig;
import org.apache.pinot.spi.config.table.SegmentPartitionConfig;
import org.apache.pinot.spi.config.table.SegmentsValidationAndRetentionConfig;
+import org.apache.pinot.spi.config.table.StarTreeIndexConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableCustomConfig;
import org.apache.pinot.spi.config.table.TableTaskConfig;
@@ -89,6 +90,8 @@ public class TableConfigBuilder {
private Map<String, String> _streamConfigs;
private SegmentPartitionConfig _segmentPartitionConfig;
private boolean _nullHandlingEnabled;
+ private List<String> _varLengthDictionaryColumns;
+ private List<StarTreeIndexConfig> _starTreeIndexConfigs;
private TableCustomConfig _customConfig;
private QuotaConfig _quotaConfig;
@@ -247,6 +250,16 @@ public class TableConfigBuilder {
return this;
}
+ public TableConfigBuilder setVarLengthDictionaryColumns(List<String> varLengthDictionaryColumns) {
+ _varLengthDictionaryColumns = varLengthDictionaryColumns;
+ return this;
+ }
+
+ public TableConfigBuilder setStarTreeIndexConfigs(List<StarTreeIndexConfig> starTreeIndexConfigs) {
+ _starTreeIndexConfigs = starTreeIndexConfigs;
+ return this;
+ }
+
public TableConfigBuilder setStreamConfigs(Map<String, String> streamConfigs) {
Preconditions.checkState(_tableType == TableType.REALTIME);
_streamConfigs = streamConfigs;
@@ -358,6 +371,8 @@ public class TableConfigBuilder {
indexingConfig.setStreamConfigs(_streamConfigs);
indexingConfig.setSegmentPartitionConfig(_segmentPartitionConfig);
indexingConfig.setNullHandlingEnabled(_nullHandlingEnabled);
+ indexingConfig.setVarLengthDictionaryColumns(_varLengthDictionaryColumns);
+ indexingConfig.setStarTreeIndexConfigs(_starTreeIndexConfigs);
if (_customConfig == null) {
_customConfig = new TableCustomConfig(null);
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org