You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by si...@apache.org on 2022/01/08 00:40:56 UTC
[pinot] branch master updated: Fix ByteArray datatype column metadata getMaxValue NPE bug and expose maxNumMultiValues (#7918)
This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 06888d8 Fix ByteArray datatype column metadata getMaxValue NPE bug and expose maxNumMultiValues (#7918)
06888d8 is described below
commit 06888d8b60598b661ae90df7e7a2cc628460191a
Author: Liang Mingqiang <mi...@linkedin.com>
AuthorDate: Fri Jan 7 16:40:36 2022 -0800
Fix ByteArray datatype column metadata getMaxValue NPE bug and expose maxNumMultiValues (#7918)
---
.../restlet/resources/TableMetadataInfo.java | 9 +-
.../util/ServerSegmentMetadataReader.java | 11 +-
.../tests/OfflineClusterIntegrationTest.java | 117 ++++++++++++++++-----
.../pinot/server/api/resources/TablesResource.java | 22 ++--
4 files changed, 118 insertions(+), 41 deletions(-)
diff --git a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java
index c2c62d6..81d86b6 100644
--- a/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java
+++ b/pinot-common/src/main/java/org/apache/pinot/common/restlet/resources/TableMetadataInfo.java
@@ -41,18 +41,21 @@ public class TableMetadataInfo {
private final long _numRows;
private final Map<String, Double> _columnLengthMap;
private final Map<String, Double> _columnCardinalityMap;
+ private final Map<String, Double> _maxNumMultiValuesMap;
@JsonCreator
public TableMetadataInfo(@JsonProperty("tableName") String tableName,
@JsonProperty("diskSizeInBytes") long sizeInBytes, @JsonProperty("numSegments") long numSegments,
@JsonProperty("numRows") long numRows, @JsonProperty("columnLengthMap") Map<String, Double> columnLengthMap,
- @JsonProperty("columnCardinalityMap") Map<String, Double> columnCardinalityMap) {
+ @JsonProperty("columnCardinalityMap") Map<String, Double> columnCardinalityMap,
+ @JsonProperty("maxNumMultiValuesMap") Map<String, Double> maxNumMultiValuesMap) {
_tableName = tableName;
_diskSizeInBytes = sizeInBytes;
_numSegments = numSegments;
_numRows = numRows;
_columnLengthMap = columnLengthMap;
_columnCardinalityMap = columnCardinalityMap;
+ _maxNumMultiValuesMap = maxNumMultiValuesMap;
}
public String getTableName() {
@@ -78,4 +81,8 @@ public class TableMetadataInfo {
public Map<String, Double> getColumnCardinalityMap() {
return _columnCardinalityMap;
}
+
+ public Map<String, Double> getMaxNumMultiValuesMap() {
+ return _maxNumMultiValuesMap;
+ }
}
diff --git a/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java b/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java
index 5fb8642..4ee509a 100644
--- a/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java
+++ b/pinot-controller/src/main/java/org/apache/pinot/controller/util/ServerSegmentMetadataReader.java
@@ -87,6 +87,7 @@ public class ServerSegmentMetadataReader {
int failedParses = 0;
final Map<String, Double> columnLengthMap = new HashMap<>();
final Map<String, Double> columnCardinalityMap = new HashMap<>();
+ final Map<String, Double> maxNumMultiValuesMap = new HashMap<>();
for (Map.Entry<String, String> streamResponse : serviceResponse._httpResponses.entrySet()) {
try {
TableMetadataInfo tableMetadataInfo =
@@ -96,6 +97,7 @@ public class ServerSegmentMetadataReader {
totalNumSegments += tableMetadataInfo.getNumSegments();
tableMetadataInfo.getColumnLengthMap().forEach((k, v) -> columnLengthMap.merge(k, v, Double::sum));
tableMetadataInfo.getColumnCardinalityMap().forEach((k, v) -> columnCardinalityMap.merge(k, v, Double::sum));
+ tableMetadataInfo.getMaxNumMultiValuesMap().forEach((k, v) -> maxNumMultiValuesMap.merge(k, v, Double::sum));
} catch (IOException e) {
failedParses++;
LOGGER.error("Unable to parse server {} response due to an error: ", streamResponse.getKey(), e);
@@ -104,17 +106,18 @@ public class ServerSegmentMetadataReader {
int finalTotalNumSegments = totalNumSegments;
columnLengthMap.replaceAll((k, v) -> v / finalTotalNumSegments);
columnCardinalityMap.replaceAll((k, v) -> v / finalTotalNumSegments);
+ maxNumMultiValuesMap.replaceAll((k, v) -> v / finalTotalNumSegments);
// Since table segments may have multiple replicas, divide diskSizeInBytes, numRows and numSegments by numReplica
- // to avoid double counting, for columnAvgLengthMap and columnAvgCardinalityMap, dividing by numReplica is not
- // needed since totalNumSegments already contains replicas.
+ // to avoid double counting, for columnAvgLengthMap, columnAvgCardinalityMap and maxNumMultiValuesMap, dividing by
+ // numReplica is not needed since totalNumSegments already contains replicas.
totalDiskSizeInBytes /= numReplica;
totalNumSegments /= numReplica;
totalNumRows /= numReplica;
TableMetadataInfo aggregateTableMetadataInfo =
- new TableMetadataInfo("", totalDiskSizeInBytes, totalNumSegments, totalNumRows, columnLengthMap,
- columnCardinalityMap);
+ new TableMetadataInfo(tableNameWithType, totalDiskSizeInBytes, totalNumSegments, totalNumRows, columnLengthMap,
+ columnCardinalityMap, maxNumMultiValuesMap);
if (failedParses != 0) {
LOGGER.warn("Failed to parse {} / {} aggregated segment metadata responses from servers.", failedParses,
serverUrls.size());
diff --git a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
index 519675c..5a41f91 100644
--- a/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
+++ b/pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/OfflineClusterIntegrationTest.java
@@ -123,6 +123,7 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
private static final String NUM_ROWS_KEY = "numRows";
private static final String COLUMN_LENGTH_MAP_KEY = "columnLengthMap";
private static final String COLUMN_CARDINALITY_MAP_KEY = "columnCardinalityMap";
+ private static final String MAX_NUM_MULTI_VALUES_MAP_KEY = "maxNumMultiValuesMap";
// TODO: This might lead to flaky test, as this disk size is not deterministic
// as it depends on the iteration order of a HashSet.
private static final int DISK_SIZE_IN_BYTES = 20796000;
@@ -1832,21 +1833,63 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
@Test
public void testAggregateMetadataAPI()
throws IOException {
- JsonNode oneColumnResponse = JsonUtils.stringToJsonNode(
- sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=DestCityMarketID"));
- assertEquals(oneColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
- assertEquals(oneColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
- assertEquals(oneColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
- assertEquals(oneColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 1);
- assertEquals(oneColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 1);
-
- JsonNode threeColumnsResponse = JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl
+ JsonNode oneSVColumnResponse = JsonUtils
+ .stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=DestCityMarketID"));
+ assertEquals(oneSVColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
+ assertEquals(oneSVColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
+ assertEquals(oneSVColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
+ assertEquals(oneSVColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 1);
+ assertEquals(oneSVColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 1);
+ // DestCityMarketID is a SV column
+ assertEquals(oneSVColumnResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 0);
+
+ JsonNode oneMVColumnResponse = JsonUtils
+ .stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=DivLongestGTimes"));
+ assertEquals(oneMVColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
+ assertEquals(oneMVColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
+ assertEquals(oneMVColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
+ assertEquals(oneMVColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 1);
+ assertEquals(oneMVColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 1);
+ // DivLongestGTimes is a MV column
+ assertEquals(oneMVColumnResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 1);
+
+ JsonNode threeSVColumnsResponse = JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl
+ "/tables/mytable/metadata?columns=DivActualElapsedTime&columns=CRSElapsedTime&columns=OriginStateName"));
- assertEquals(threeColumnsResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
- assertEquals(threeColumnsResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
- assertEquals(threeColumnsResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
- assertEquals(threeColumnsResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3);
- assertEquals(threeColumnsResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3);
+ assertEquals(threeSVColumnsResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
+ assertEquals(threeSVColumnsResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
+ assertEquals(threeSVColumnsResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
+ assertEquals(threeSVColumnsResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3);
+ assertEquals(threeSVColumnsResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3);
+ assertEquals(threeSVColumnsResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 0);
+
+ JsonNode threeSVColumnsWholeEncodedResponse = JsonUtils.stringToJsonNode(sendGetRequest(
+ _controllerBaseApiUrl + "/tables/mytable/metadata?columns="
+ + "DivActualElapsedTime%26columns%3DCRSElapsedTime%26columns%3DOriginStateName"));
+ assertEquals(threeSVColumnsWholeEncodedResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
+ assertEquals(threeSVColumnsWholeEncodedResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
+ assertEquals(threeSVColumnsWholeEncodedResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
+ assertEquals(threeSVColumnsWholeEncodedResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3);
+ assertEquals(threeSVColumnsWholeEncodedResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3);
+ assertEquals(threeSVColumnsWholeEncodedResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 0);
+
+ JsonNode threeMVColumnsResponse = JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl
+ + "/tables/mytable/metadata?columns=DivLongestGTimes&columns=DivWheelsOns&columns=DivAirports"));
+ assertEquals(threeMVColumnsResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
+ assertEquals(threeMVColumnsResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
+ assertEquals(threeMVColumnsResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
+ assertEquals(threeMVColumnsResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3);
+ assertEquals(threeMVColumnsResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3);
+ assertEquals(threeMVColumnsResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 3);
+
+ JsonNode threeMVColumnsWholeEncodedResponse = JsonUtils.stringToJsonNode(sendGetRequest(
+ _controllerBaseApiUrl + "/tables/mytable/metadata?columns="
+ + "DivLongestGTimes%26columns%3DDivWheelsOns%26columns%3DDivAirports"));
+ assertEquals(threeMVColumnsWholeEncodedResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
+ assertEquals(threeMVColumnsWholeEncodedResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
+ assertEquals(threeMVColumnsWholeEncodedResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
+ assertEquals(threeMVColumnsWholeEncodedResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 3);
+ assertEquals(threeMVColumnsWholeEncodedResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 3);
+ assertEquals(threeMVColumnsWholeEncodedResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 3);
JsonNode zeroColumnResponse =
JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata"));
@@ -1855,21 +1898,39 @@ public class OfflineClusterIntegrationTest extends BaseClusterIntegrationTestSet
assertEquals(zeroColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
assertEquals(zeroColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 0);
assertEquals(zeroColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 0);
+ assertEquals(zeroColumnResponse.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 0);
- JsonNode allColumnResponse =
+ JsonNode starColumnResponse =
JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=*"));
- assertEquals(allColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
- assertEquals(allColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
- assertEquals(allColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
- assertEquals(allColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 82);
- assertEquals(allColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 82);
-
- allColumnResponse = JsonUtils.stringToJsonNode(sendGetRequest(
- _controllerBaseApiUrl + "/tables/mytable/metadata?columns=CRSElapsedTime&columns=*&columns=OriginStateName"));
- assertEquals(allColumnResponse.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
- assertEquals(allColumnResponse.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
- assertEquals(allColumnResponse.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
- assertEquals(allColumnResponse.get(COLUMN_LENGTH_MAP_KEY).size(), 82);
- assertEquals(allColumnResponse.get(COLUMN_CARDINALITY_MAP_KEY).size(), 82);
+ validateAllColumnsResponse(starColumnResponse);
+
+ JsonNode starEncodedColumnResponse =
+ JsonUtils.stringToJsonNode(sendGetRequest(_controllerBaseApiUrl + "/tables/mytable/metadata?columns=%2A"));
+ validateAllColumnsResponse(starEncodedColumnResponse);
+
+ JsonNode starWithExtraColumnResponse = JsonUtils.stringToJsonNode(sendGetRequest(
+ _controllerBaseApiUrl + "/tables/mytable/metadata?columns="
+ + "CRSElapsedTime&columns=*&columns=OriginStateName"));
+ validateAllColumnsResponse(starWithExtraColumnResponse);
+
+ JsonNode starWithExtraEncodedColumnResponse = JsonUtils.stringToJsonNode(sendGetRequest(
+ _controllerBaseApiUrl + "/tables/mytable/metadata?columns="
+ + "CRSElapsedTime&columns=%2A&columns=OriginStateName"));
+ validateAllColumnsResponse(starWithExtraEncodedColumnResponse);
+
+ JsonNode starWithExtraColumnWholeEncodedResponse = JsonUtils.stringToJsonNode(sendGetRequest(
+ _controllerBaseApiUrl + "/tables/mytable/metadata?columns="
+ + "CRSElapsedTime%26columns%3D%2A%26columns%3DOriginStateName"));
+ validateAllColumnsResponse(starWithExtraColumnWholeEncodedResponse);
+ }
+
+ private void validateAllColumnsResponse(JsonNode response) {
+ assertEquals(response.get(DISK_SIZE_IN_BYTES_KEY).asInt(), DISK_SIZE_IN_BYTES);
+ assertEquals(response.get(NUM_SEGMENTS_KEY).asInt(), NUM_SEGMENTS);
+ assertEquals(response.get(NUM_ROWS_KEY).asInt(), NUM_ROWS);
+ // mytable has 82 columns, among them 9 of the columns are MV columns.
+ assertEquals(response.get(COLUMN_LENGTH_MAP_KEY).size(), 82);
+ assertEquals(response.get(COLUMN_CARDINALITY_MAP_KEY).size(), 82);
+ assertEquals(response.get(MAX_NUM_MULTI_VALUES_MAP_KEY).size(), 9);
}
}
diff --git a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java
index 693cabc..ebf3346 100644
--- a/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java
+++ b/pinot-server/src/main/java/org/apache/pinot/server/api/resources/TablesResource.java
@@ -184,6 +184,7 @@ public class TablesResource {
long totalNumRows = 0;
Map<String, Double> columnLengthMap = new HashMap<>();
Map<String, Double> columnCardinalityMap = new HashMap<>();
+ Map<String, Double> maxNumMultiValuesMap = new HashMap<>();
try {
for (SegmentDataManager segmentDataManager : segmentDataManagers) {
if (segmentDataManager instanceof ImmutableSegmentDataManager) {
@@ -202,7 +203,7 @@ public class TablesResource {
}
for (String column : columnSet) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataMap().get(column);
- int columnLength;
+ int columnLength = 0;
DataType storedDataType = columnMetadata.getDataType().getStoredType();
if (storedDataType.isFixedWidth()) {
// For type of fixed width: INT, LONG, FLOAT, DOUBLE, BOOLEAN (stored as INT), TIMESTAMP (stored as LONG),
@@ -210,21 +211,26 @@ public class TablesResource {
columnLength = storedDataType.size();
} else if (columnMetadata.hasDictionary()) {
// For type of variable width (String, Bytes), if it's stored using dictionary encoding, set the
- // columnLength as the max
- // length in dictionary.
+ // columnLength as the max length in dictionary.
columnLength = columnMetadata.getColumnMaxLength();
} else if (storedDataType == DataType.STRING || storedDataType == DataType.BYTES) {
// For type of variable width (String, Bytes), if it's stored using raw bytes, set the columnLength as
- // the length
- // of the max value.
- columnLength = ((String) columnMetadata.getMaxValue()).getBytes(StandardCharsets.UTF_8).length;
+ // the length of the max value.
+ if (columnMetadata.getMaxValue() != null) {
+ String maxValueString = (String) columnMetadata.getMaxValue();
+ columnLength = maxValueString.getBytes(StandardCharsets.UTF_8).length;
+ }
} else {
// For type of STRUCT, MAP, LIST, set the columnLength as DEFAULT_MAX_LENGTH (512).
columnLength = FieldSpec.DEFAULT_MAX_LENGTH;
}
- int columnCardinality = segmentMetadata.getColumnMetadataMap().get(column).getCardinality();
+ int columnCardinality = columnMetadata.getCardinality();
columnLengthMap.merge(column, (double) columnLength, Double::sum);
columnCardinalityMap.merge(column, (double) columnCardinality, Double::sum);
+ if (!columnMetadata.isSingleValue()) {
+ int maxNumMultiValues = columnMetadata.getMaxNumberOfMultiValues();
+ maxNumMultiValuesMap.merge(column, (double) maxNumMultiValues, Double::sum);
+ }
}
}
}
@@ -239,7 +245,7 @@ public class TablesResource {
TableMetadataInfo tableMetadataInfo =
new TableMetadataInfo(tableDataManager.getTableName(), totalSegmentSizeBytes, segmentDataManagers.size(),
- totalNumRows, columnLengthMap, columnCardinalityMap);
+ totalNumRows, columnLengthMap, columnCardinalityMap, maxNumMultiValuesMap);
return ResourceUtils.convertToJsonString(tableMetadataInfo);
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org