You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2020/09/13 22:17:30 UTC
[GitHub] [incubator-pinot] Jackie-Jiang commented on a change in pull request #6009: Adjust schema validation logic in AvroIngestionSchemaValidator
Jackie-Jiang commented on a change in pull request #6009:
URL: https://github.com/apache/incubator-pinot/pull/6009#discussion_r487583315
##########
File path: pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java
##########
@@ -111,36 +112,56 @@ private void validateSchemas() {
}
}
if (nonNullSchema != null) {
+ avroColumnSchema = nonNullSchema;
avroColumnType = nonNullSchema.getType();
}
}
- if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
- _dataTypeMismatch.addMismatchReason(String
- .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
- fieldSpec.getDataType().name(), avroColumnSchema.getName(), avroColumnType.toString(),
- getInputSchemaType()));
- }
-
if (fieldSpec.isSingleValueField()) {
+ // check data type mismatch
+ if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
Review comment:
(nit)
```suggestion
if (!fieldSpec.getDataType().name().equals(avroColumnType.name())) {
```
##########
File path: pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java
##########
@@ -111,36 +112,56 @@ private void validateSchemas() {
}
}
if (nonNullSchema != null) {
+ avroColumnSchema = nonNullSchema;
avroColumnType = nonNullSchema.getType();
}
}
- if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
- _dataTypeMismatch.addMismatchReason(String
- .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
- fieldSpec.getDataType().name(), avroColumnSchema.getName(), avroColumnType.toString(),
- getInputSchemaType()));
- }
-
if (fieldSpec.isSingleValueField()) {
+ // check data type mismatch
+ if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
+ getDataTypeMismatchResult().addMismatchReason(String
+ .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
+ fieldSpec.getDataType().name(), avroColumnName, avroColumnType.toString(),
+ getInputSchemaType()));
+ }
+ // check single-value multi-value mismatch
if (avroColumnType.ordinal() < org.apache.avro.Schema.Type.STRING.ordinal()) {
Review comment:
You might want to check single-value multi-value mismatch first, then check data type based on whether they match, or you will always get data type mismatch if single-value multi-value does not match
##########
File path: pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java
##########
@@ -111,36 +112,56 @@ private void validateSchemas() {
}
}
if (nonNullSchema != null) {
+ avroColumnSchema = nonNullSchema;
avroColumnType = nonNullSchema.getType();
}
}
- if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
- _dataTypeMismatch.addMismatchReason(String
- .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
- fieldSpec.getDataType().name(), avroColumnSchema.getName(), avroColumnType.toString(),
- getInputSchemaType()));
- }
-
if (fieldSpec.isSingleValueField()) {
+ // check data type mismatch
+ if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
+ getDataTypeMismatchResult().addMismatchReason(String
+ .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
+ fieldSpec.getDataType().name(), avroColumnName, avroColumnType.toString(),
Review comment:
```suggestion
fieldSpec.getDataType().name(), avroColumnName, avroColumnType.name(),
```
##########
File path: pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java
##########
@@ -111,36 +112,56 @@ private void validateSchemas() {
}
}
if (nonNullSchema != null) {
+ avroColumnSchema = nonNullSchema;
avroColumnType = nonNullSchema.getType();
}
}
- if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
- _dataTypeMismatch.addMismatchReason(String
- .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
- fieldSpec.getDataType().name(), avroColumnSchema.getName(), avroColumnType.toString(),
- getInputSchemaType()));
- }
-
if (fieldSpec.isSingleValueField()) {
+ // check data type mismatch
+ if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
+ getDataTypeMismatchResult().addMismatchReason(String
Review comment:
(nit) directly use the member variable? Same for other places
```suggestion
_dataTypeMismatch.addMismatchReason(String
```
##########
File path: pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java
##########
@@ -111,36 +112,56 @@ private void validateSchemas() {
}
}
if (nonNullSchema != null) {
+ avroColumnSchema = nonNullSchema;
avroColumnType = nonNullSchema.getType();
}
}
- if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
- _dataTypeMismatch.addMismatchReason(String
- .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
- fieldSpec.getDataType().name(), avroColumnSchema.getName(), avroColumnType.toString(),
- getInputSchemaType()));
- }
-
if (fieldSpec.isSingleValueField()) {
+ // check data type mismatch
+ if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
+ getDataTypeMismatchResult().addMismatchReason(String
+ .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
+ fieldSpec.getDataType().name(), avroColumnName, avroColumnType.toString(),
+ getInputSchemaType()));
+ }
+ // check single-value multi-value mismatch
if (avroColumnType.ordinal() < org.apache.avro.Schema.Type.STRING.ordinal()) {
// the column is a complex structure
- _singleValueMultiValueFieldMismatch.addMismatchReason(String.format(
- "The Pinot column: %s is 'single-value' column but the column: %s from input %s is 'multi-value' column.",
- columnName, avroColumnSchema.getName(), getInputSchemaType()));
+ getSingleValueMultiValueFieldMismatchResult().addMismatchReason(String
+ .format(
+ "The Pinot column: %s is 'single-value' column but the column: %s from input %s is 'multi-value' column.",
+ columnName, avroColumnName, getInputSchemaType()));
}
} else {
+ // check data type mismatch
+ FieldSpec.DataType dataTypeForMVColumn = AvroUtils.extractFieldDataType(avroColumnField);
+ if (fieldSpec.getDataType() != dataTypeForMVColumn) {
+ getDataTypeMismatchResult().addMismatchReason(String
+ .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.",
+ columnName, fieldSpec.getDataType().name(), avroColumnName, dataTypeForMVColumn.name(),
+ getInputSchemaType()));
+ }
+ // check single-value multi-value mismatch
if (avroColumnType.ordinal() >= org.apache.avro.Schema.Type.STRING.ordinal()) {
// the column is a complex structure
- _singleValueMultiValueFieldMismatch.addMismatchReason(String.format(
- "The Pinot column: %s is 'multi-value' column but the column: %s from input %s schema is 'single-value' column.",
- columnName, avroColumnSchema.getName(), getInputSchemaType()));
+ getSingleValueMultiValueFieldMismatchResult().addMismatchReason(String
+ .format(
+ "The Pinot column: %s is 'multi-value' column but the column: %s from input %s schema is 'single-value' column.",
+ columnName, avroColumnName, getInputSchemaType()));
}
+ // check multi-value column structure mismatch
if (avroColumnType != org.apache.avro.Schema.Type.ARRAY) {
// multi-value column should use array structure for now.
- _multiValueStructureMismatch.addMismatchReason(String.format(
+ getMultiValueStructureMismatchResult().addMismatchReason(String.format(
"The Pinot column: %s is 'multi-value' column but the column: %s from input %s schema is of '%s' type, which should have been of 'array' type.",
- columnName, avroColumnSchema.getName(), getInputSchemaType(), avroColumnType.getName()));
+ columnName, avroColumnName, getInputSchemaType(), avroColumnType.getName()));
+ } else if (avroColumnSchema.getElementType().getType().ordinal() < org.apache.avro.Schema.Type.STRING
Review comment:
(nit) cache `avroColumnSchema.getElementType().getType()` in a local variable?
##########
File path: pinot-plugins/pinot-input-format/pinot-avro-base/src/main/java/org/apache/pinot/plugin/inputformat/avro/AvroIngestionSchemaValidator.java
##########
@@ -111,36 +112,56 @@ private void validateSchemas() {
}
}
if (nonNullSchema != null) {
+ avroColumnSchema = nonNullSchema;
avroColumnType = nonNullSchema.getType();
}
}
- if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
- _dataTypeMismatch.addMismatchReason(String
- .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
- fieldSpec.getDataType().name(), avroColumnSchema.getName(), avroColumnType.toString(),
- getInputSchemaType()));
- }
-
if (fieldSpec.isSingleValueField()) {
+ // check data type mismatch
+ if (!fieldSpec.getDataType().name().equalsIgnoreCase(avroColumnType.toString())) {
+ getDataTypeMismatchResult().addMismatchReason(String
+ .format("The Pinot column: (%s: %s) doesn't match with the column (%s: %s) in input %s schema.", columnName,
+ fieldSpec.getDataType().name(), avroColumnName, avroColumnType.toString(),
+ getInputSchemaType()));
+ }
+ // check single-value multi-value mismatch
if (avroColumnType.ordinal() < org.apache.avro.Schema.Type.STRING.ordinal()) {
// the column is a complex structure
- _singleValueMultiValueFieldMismatch.addMismatchReason(String.format(
- "The Pinot column: %s is 'single-value' column but the column: %s from input %s is 'multi-value' column.",
- columnName, avroColumnSchema.getName(), getInputSchemaType()));
+ getSingleValueMultiValueFieldMismatchResult().addMismatchReason(String
+ .format(
+ "The Pinot column: %s is 'single-value' column but the column: %s from input %s is 'multi-value' column.",
+ columnName, avroColumnName, getInputSchemaType()));
}
} else {
+ // check data type mismatch
Review comment:
Same here, check SV MV first, then based on that check the data type
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
users@infra.apache.org
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org