You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by sn...@apache.org on 2023/11/27 19:40:06 UTC

(pinot) 03/06: Revert "added code to remove NaN from multivalued columns and modified order of transformers."

This is an automated email from the ASF dual-hosted git repository.

snlee pushed a commit to branch revert-12032-santizeNanNegativeZeroBehaviour
in repository https://gitbox.apache.org/repos/asf/pinot.git

commit f51e685a4972666866785f41d6d1a088be58be34
Author: Seunghyun Lee <sn...@apache.org>
AuthorDate: Mon Nov 27 11:39:57 2023 -0800

    Revert "added code to remove NaN from multivalued columns and modified order of transformers."
    
    This reverts commit 06e91c13a11c5a07939adf326de28e95e39f4596.
---
 .../recordtransformer/CompositeTransformer.java    |  7 +++----
 .../recordtransformer/SpecialValueTransformer.java | 18 ++++++----------
 .../recordtransformer/RecordTransformerTest.java   | 24 +++++++++-------------
 3 files changed, 19 insertions(+), 30 deletions(-)

diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
index 0ba394e9c9..50ca2a97c1 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/CompositeTransformer.java
@@ -70,16 +70,15 @@ public class CompositeTransformer implements RecordTransformer {
    *   </li>
    *   <li>
    *     {@link SpecialValueTransformer} after {@link DataTypeTransformer} so that we already have the values complying
-   *      with the schema before handling special values and before {@link NullValueTransformer} so that it transforms
-   *      all the null values properly
+   *      with the schema before handling special values
    *   </li>
    * </ul>
    */
   public static List<RecordTransformer> getDefaultTransformers(TableConfig tableConfig, Schema schema) {
     return Stream.of(new ExpressionTransformer(tableConfig, schema), new FilterTransformer(tableConfig),
             new SchemaConformingTransformer(tableConfig, schema), new DataTypeTransformer(tableConfig, schema),
-            new TimeValidationTransformer(tableConfig, schema), new SpecialValueTransformer(schema),
-            new NullValueTransformer(tableConfig, schema), new SanitizationTransformer(schema)).filter(t -> !t.isNoOp())
+            new TimeValidationTransformer(tableConfig, schema), new NullValueTransformer(tableConfig, schema),
+            new SpecialValueTransformer(schema), new SanitizationTransformer(schema)).filter(t -> !t.isNoOp())
         .collect(Collectors.toList());
   }
 
diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
index 26df94933a..4cfbbc8ce9 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/recordtransformer/SpecialValueTransformer.java
@@ -18,9 +18,7 @@
  */
 package org.apache.pinot.segment.local.recordtransformer;
 
-import java.util.ArrayList;
 import java.util.HashSet;
-import java.util.List;
 import org.apache.pinot.spi.data.FieldSpec;
 import org.apache.pinot.spi.data.FieldSpec.DataType;
 import org.apache.pinot.spi.data.Schema;
@@ -67,10 +65,10 @@ public class SpecialValueTransformer implements RecordTransformer {
   private Object transformNaN(Object value) {
     if ((value instanceof Float) && ((Float) value).isNaN()) {
       LOGGER.info("Float.NaN detected, converting to default null.");
-      value = null;
+      value = FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT;
     } else if ((value instanceof Double) && ((Double) value).isNaN()) {
       LOGGER.info("Double.NaN detected, converting to default null.");
-      value = null;
+      value = FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE;
     }
     return value;
   }
@@ -88,16 +86,12 @@ public class SpecialValueTransformer implements RecordTransformer {
         // Multi-valued column.
         Object[] values = (Object[]) value;
         int numValues = values.length;
-        List<Object> negativeZeroNanSanitizedValues = new ArrayList<>(numValues);
-        int numberOfElements = values.length;
-        for (Object o : values) {
-          Object zeroTransformedValue = transformNegativeZero(o);
-          Object nanTransformedValue = transformNaN(zeroTransformedValue);
-          if (nanTransformedValue != null) {
-            negativeZeroNanSanitizedValues.add(nanTransformedValue);
+        for (int i = 0; i < numValues; i++) {
+          if (values[i] != null) {
+            values[i] = transformNegativeZero(values[i]);
+            values[i] = transformNaN(values[i]);
           }
         }
-        record.putValue(element,negativeZeroNanSanitizedValues.toArray());
       } else {
         // Single-valued column.
         Object zeroTransformedValue = transformNegativeZero(value);
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
index 2685f8b6da..6361963785 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/recordtransformer/RecordTransformerTest.java
@@ -19,8 +19,6 @@
 package org.apache.pinot.segment.local.recordtransformer;
 
 import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
@@ -288,12 +286,12 @@ public class RecordTransformerTest {
           Double.doubleToRawLongBits(0.0d));
       assertEquals(record.getValue("mvFloatNegativeZero"), new Float[]{0.0f, 1.0f, 0.0f, 3.0f});
       assertEquals(record.getValue("mvDoubleNegativeZero"), new Double[]{0.0d, 1.0d, 0.0d, 3.0d});
-      assertNull(record.getValue("svFloatNaN"));
-      assertNull(record.getValue("svDoubleNaN"));
+      assertEquals(record.getValue("svFloatNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT);
+      assertEquals(record.getValue("svDoubleNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE);
       assertEquals(record.getValue("mvFloatNaN"),
-          new Float[]{0.0f, 2.0f});
+          new Float[]{0.0f, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT, 2.0f});
       assertEquals(record.getValue("mvDoubleNaN"),
-          new Double[]{0.0d, 2.0d});
+          new Double[]{0.0d, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE, 2.0d});
     }
   }
 
@@ -304,8 +302,7 @@ public class RecordTransformerTest {
     // Build Schema and ingestionConfig in such a way that all the transformers are loaded.
     Schema schema = new Schema.SchemaBuilder().addSingleValueDimension("svInt", DataType.INT)
         .addSingleValueDimension("svDouble", DataType.DOUBLE)
-        .addSingleValueDimension("expressionTestColumn", DataType.INT)
-        .addSingleValueDimension("svNaN", DataType.FLOAT).addMultiValueDimension("mvNaN",DataType.FLOAT)
+        .addSingleValueDimension("expressionTestColumn", DataType.INT).addSingleValueDimension("svNaN", DataType.FLOAT)
         .addSingleValueDimension("emptyDimensionForNullValueTransformer", DataType.FLOAT)
         .addSingleValueDimension("svStringWithNullCharacters", DataType.STRING)
         .addSingleValueDimension("indexableExtras", DataType.JSON)
@@ -330,8 +327,8 @@ public class RecordTransformerTest {
     List<RecordTransformer> expectedListOfTransformers =
         List.of(new ExpressionTransformer(tableConfig, schema), new FilterTransformer(tableConfig),
             new SchemaConformingTransformer(tableConfig, schema), new DataTypeTransformer(tableConfig, schema),
-            new TimeValidationTransformer(tableConfig, schema), new SpecialValueTransformer(schema),
-            new NullValueTransformer(tableConfig, schema), new SanitizationTransformer(schema));
+            new TimeValidationTransformer(tableConfig, schema), new NullValueTransformer(tableConfig, schema),
+            new SpecialValueTransformer(schema), new SanitizationTransformer(schema));
 
     // Check that the number of current transformers match the expected number of transformers.
     assertEquals(currentListOfTransformers.size(), NUMBER_OF_TRANSFORMERS);
@@ -352,7 +349,6 @@ public class RecordTransformerTest {
 
     // Data for SpecialValue Transformer.
     record.putValue("svNaN", Float.NaN);
-    record.putValue("mvNaN",new Float[]{1.0f,Float.NaN,2.0f});
 
     // Data for sanitization transformer.
     record.putValue("svStringWithNullCharacters", "1\0002\0003");
@@ -649,10 +645,10 @@ public class RecordTransformerTest {
       assertEquals(record.getValue("svFloatNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT);
       assertEquals(record.getValue("svDoubleNaN"), FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE);
       assertEquals(record.getValue("mvFloatNaN"),
-          new Float[]{0.0f, 2.0f});
+          new Float[]{0.0f, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_FLOAT, 2.0f});
       assertEquals(record.getValue("mvDoubleNaN"),
-          new Double[]{0.0d, 2.0d});
-      assertEquals(new ArrayList<>(record.getNullValueFields()), new ArrayList<>(Arrays.asList("svFloatNaN","svDoubleNaN")));
+          new Double[]{0.0d, FieldSpec.DEFAULT_DIMENSION_NULL_VALUE_OF_DOUBLE, 2.0d});
+      assertTrue(record.getNullValueFields().isEmpty());
     }
 
     // Test empty record


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org