You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by sn...@apache.org on 2023/11/27 19:40:06 UTC
(pinot) 03/06: Revert "added code to remove NaN from multivalued columns and modified order of transformers."
This is an automated email from the ASF dual-hosted git repository.
snlee pushed a commit to branch revert-12032-santizeNanNegativeZeroBehaviour
in repository https://gitbox.apache.org/repos/asf/pinot.git
commit f51e685a4972666866785f41d6d1a088be58be34
Author: Seunghyun Lee <sn...@apache.org>
AuthorDate: Mon Nov 27 11:39:57 2023 -0800
Revert "added code to remove NaN from multivalued columns and modified order of transformers."
This reverts commit 06e91c13a11c5a07939adf326de28e95e39f4596.
---
.../recordtransformer/CompositeTransformer.java | 7 +++----
.../recordtransformer/SpecialValueTransformer.java | 18 ++++++----------
.../recordtransformer/RecordTransformerTest.java | 24 +++++++++-------------
3 files changed, 19 insertions(+), 30 deletions(-)
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
index 0ba394e9c9..50ca2a97c1 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
@@ -70,16 +70,15 @@ public class CompositeTransformer implements RecordTransformer {
* </li>
* <li>
* {@link SpecialValueTransformer} after {@link DataTypeTransformer} so that we already have the values complying
- * with the schema before handling special values and before {@link NullValueTransformer} so that it transforms
- * all the null values properly
+ * with the schema before handling special values
* </li>
* </ul>
*/
public static List<RecordTransformer> getDefaultTransformers(TableConfig tableConfig, Schema schema) {
return Stream.of(new ExpressionTransformer(tableConfig, schema), new FilterTransformer(tableConfig),
new SchemaConformingTransformer(tableConfig, schema), new DataTypeTransformer(tableConfig, schema),
- new TimeValidationTransformer(tableConfig, schema), new SpecialValueTransformer(schema),
- new NullValueTransformer(tableConfig, schema), new SanitizationTransformer(schema)).filter(t -> !t.isNoOp())
+ new TimeValidationTransformer(tableConfig, schema), new NullValueTransformer(tableConfig, schema),
+ new SpecialValueTransformer(schema), new SanitizationTransformer(schema)).filter(t -> !t.isNoOp())
.collect(Collectors.toList());
}
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
index 26df94933a..4cfbbc8ce9 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
@@ -18,9 +18,7 @@
*/
package org.apache.pinot.segment.local.recordtransformer;
-import java.util.ArrayList;
import java.util.HashSet;
-import java.util.List;
import org.apache.pinot.spi.data.FieldSpec;
import org.apache.pinot.spi.data.FieldSpec.DataType;
import org.apache.pinot.spi.data.Schema;
@@ -67,10 +65,10 @@ public class SpecialValueTransformer implements RecordTransformer {
private Object transformNaN(Object value) {
if ((value instanceof Float) && ((Float) value).isNaN()) {
LOGGER.info("Float.NaN detected, converting to default null.");
- value = null;
+ value = FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT;
} else if ((value instanceof Double) && ((Double) value).isNaN()) {
LOGGER.info("Double.NaN detected, converting to default null.");
- value = null;
+ value = FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE;
}
return value;
}
@@ -88,16 +86,12 @@ public class SpecialValueTransformer implements RecordTransformer {
// Multi-valued column.
Object[] values = (Object[]) value;
int numValues = values.length;
- List<Object> negativeZeroNanSanitizedValues = new ArrayList<>(numValues);
- int numberOfElements = values.length;
- for (Object o : values) {
- Object zeroTransformedValue = transformNegativeZero(o);
- Object nanTransformedValue = transformNaN(zeroTransformedValue);
- if (nanTransformedValue != null) {
- negativeZeroNanSanitizedValues.add(nanTransformedValue);
+ for (int i = 0; i < numValues; i++) {
+ if (values[i] != null) {
+ values[i] = transformNegativeZero(values[i]);
+ values[i] = transformNaN(values[i]);
}
}
- record.putValue(element,negativeZeroNanSanitizedValues.toArray());
} else {
// Single-valued column.
Object zeroTransformedValue = transformNegativeZero(value);
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
index 2685f8b6da..6361963785 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
@@ -19,8 +19,6 @@
package org.apache.pinot.segment.local.recordtransformer;
import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
@@ -288,12 +286,12 @@ public class RecordTransformerTest {
Double.doubleToRawLongBits(0.0d));
assertEquals(record.getValue("mvFloatNegativeZero"), new Float[]{0.0f, 1.0f, 0.0f, 3.0f});
assertEquals(record.getValue("mvDoubleNegativeZero"), new Double[]{0.0d, 1.0d, 0.0d, 3.0d});
- assertNull(record.getValue("svFloatNaN"));
- assertNull(record.getValue("svDoubleNaN"));
+ assertEquals(record.getValue("svFloatNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT);
+ assertEquals(record.getValue("svDoubleNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE);
assertEquals(record.getValue("mvFloatNaN"),
- new Float[]{0.0f, 2.0f});
+ new Float[]{0.0f, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT, 2.0f});
assertEquals(record.getValue("mvDoubleNaN"),
- new Double[]{0.0d, 2.0d});
+ new Double[]{0.0d, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE, 2.0d});
}
}
@@ -304,8 +302,7 @@ public class RecordTransformerTest {
// Build Schema and ingestionConfig in such a way that all the transformers are loaded.
Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", DataType.INT)
.addSingleValueDimension("svDouble", DataType.DOUBLE)
- .addSingleValueDimension("expressionTestColumn", DataType.INT)
- .addSingleValueDimension("svNaN", DataType.FLOAT).addMultiValueDimension("mvNaN",DataType.FLOAT)
+ .addSingleValueDimension("expressionTestColumn", DataType.INT).addSingleValueDimension("svNaN", DataType.FLOAT)
.addSingleValueDimension("emptyDimensionForNullValueTransformer", DataType.FLOAT)
.addSingleValueDimension("svStringWithNullCharacters", DataType.STRING)
.addSingleValueDimension("indexableExtras", DataType.JSON)
@@ -330,8 +327,8 @@ public class RecordTransformerTest {
List<RecordTransformer> expectedListOfTransformers =
List.of(new ExpressionTransformer(tableConfig, schema), new FilterTransformer(tableConfig),
new SchemaConformingTransformer(tableConfig, schema), new DataTypeTransformer(tableConfig, schema),
- new TimeValidationTransformer(tableConfig, schema), new SpecialValueTransformer(schema),
- new NullValueTransformer(tableConfig, schema), new SanitizationTransformer(schema));
+ new TimeValidationTransformer(tableConfig, schema), new NullValueTransformer(tableConfig, schema),
+ new SpecialValueTransformer(schema), new SanitizationTransformer(schema));
// Check that the number of current transformers match the expected number of transformers.
assertEquals(currentListOfTransformers.size(), NUMBER_OF_TRANSFORMERS);
@@ -352,7 +349,6 @@ public class RecordTransformerTest {
// Data for SpecialValue Transformer.
record.putValue("svNaN", Float.NaN);
- record.putValue("mvNaN",new Float[]{1.0f,Float.NaN,2.0f});
// Data for sanitization transformer.
record.putValue("svStringWithNullCharacters", "1\0002\0003");
@@ -649,10 +645,10 @@ public class RecordTransformerTest {
assertEquals(record.getValue("svFloatNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT);
assertEquals(record.getValue("svDoubleNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE);
assertEquals(record.getValue("mvFloatNaN"),
- new Float[]{0.0f, 2.0f});
+ new Float[]{0.0f, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT, 2.0f});
assertEquals(record.getValue("mvDoubleNaN"),
- new Double[]{0.0d, 2.0d});
- assertEquals(new ArrayList<>(record.getNullValueFields()), new ArrayList<>(Arrays.asList("svFloatNaN","svDoubleNaN")));
+ new Double[]{0.0d, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE, 2.0d});
+ assertTrue(record.getNullValueFields().isEmpty());
}
// Test empty record
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org