You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by ja...@apache.org on 2024/02/13 07:22:41 UTC

(pinot) branch master updated: Fix the double unescape of property value (#12405)

This is an automated email from the ASF dual-hosted git repository.

jackie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 0e09e725f3 Fix the double unescape of property value (#12405)
0e09e725f3 is described below

commit 0e09e725f32a2cfb71baa1de934a4364c05ace7c
Author: Xiaotian (Jackie) Jiang <17...@users.noreply.github.com>
AuthorDate: Mon Feb 12 23:22:35 2024 -0800

    Fix the double unescape of property value (#12405)
---
 .../creator/impl/SegmentColumnarIndexCreator.java  | 21 ++++++++--
 .../local/segment/index/ColumnMetadataTest.java    |  9 +++--
 .../pinot/spi/env/CommonsConfigurationUtils.java   | 46 +++++++++++++---------
 .../spi/env/CommonsConfigurationUtilsTest.java     | 21 +++++++---
 4 files changed, 65 insertions(+), 32 deletions(-)

diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
index 5f5c589f2f..957d6f98b1 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/creator/impl/SegmentColumnarIndexCreator.java
@@ -618,18 +618,31 @@ public class SegmentColumnarIndexCreator implements SegmentCreator {
       //       null value changes
       defaultNullValue = CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(defaultNullValue);
     }
-    properties.setProperty(getKeyFor(column, DEFAULT_NULL_VALUE), defaultNullValue);
+    if (defaultNullValue != null) {
+      properties.setProperty(getKeyFor(column, DEFAULT_NULL_VALUE), defaultNullValue);
+    }
   }
 
   public static void addColumnMinMaxValueInfo(PropertiesConfiguration properties, String column, String minValue,
       String maxValue, DataType storedType) {
-    properties.setProperty(getKeyFor(column, MIN_VALUE), getValidPropertyValue(minValue, false, storedType));
-    properties.setProperty(getKeyFor(column, MAX_VALUE), getValidPropertyValue(maxValue, true, storedType));
+    String validMinValue = getValidPropertyValue(minValue, false, storedType);
+    if (validMinValue != null) {
+      properties.setProperty(getKeyFor(column, MIN_VALUE), validMinValue);
+    }
+    String validMaxValue = getValidPropertyValue(maxValue, true, storedType);
+    if (validMaxValue != null) {
+      properties.setProperty(getKeyFor(column, MAX_VALUE), validMaxValue);
+    }
+    if (validMinValue == null && validMaxValue == null) {
+      properties.setProperty(getKeyFor(column, MIN_MAX_VALUE_INVALID), true);
+    }
   }
 
   /**
-   * Helper method to get the valid value for setting min/max.
+   * Helper method to get the valid value for setting min/max. Returns {@code null} if the value is not supported in
+   * {@link PropertiesConfiguration}, e.g. contains character with surrogate.
    */
+  @Nullable
   private static String getValidPropertyValue(String value, boolean isMax, DataType storedType) {
     String valueWithinLengthLimit = getValueWithinLengthLimit(value, isMax, storedType);
     return storedType == DataType.STRING ? CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(
diff --git a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java
index e961ad5000..b8c1f58519 100644
--- a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java
+++ b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/ColumnMetadataTest.java
@@ -73,9 +73,9 @@ public class ColumnMetadataTest {
     final String filePath =
         TestUtils.getFileFromResourceUrl(ColumnMetadataTest.class.getClassLoader().getResource(AVRO_DATA));
     // Intentionally changed this to TimeUnit.Hours to make it non-default for testing.
-    SegmentGeneratorConfig config = SegmentTestUtils
-        .getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch", TimeUnit.HOURS,
-            "testTable");
+    SegmentGeneratorConfig config =
+        SegmentTestUtils.getSegmentGenSpecWithSchemAndProjectedColumns(new File(filePath), INDEX_DIR, "daysSinceEpoch",
+            TimeUnit.HOURS, "testTable");
     config.setSegmentNamePostfix("1");
     return config;
   }
@@ -223,6 +223,7 @@ public class ColumnMetadataTest {
     PropertiesConfiguration propertiesConfiguration = CommonsConfigurationUtils.fromFile(metadataFile);
     ColumnMetadataImpl installationOutput =
         ColumnMetadataImpl.fromPropertiesConfiguration("installation_output", propertiesConfiguration);
-    Assert.assertNotNull(installationOutput);
+    Assert.assertEquals(installationOutput.getMinValue(),
+        "\r\n\r\n  utils   em::C:\\dir\\utils\r\nPSParentPath            : Mi");
   }
 }
diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java b/pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java
index 759fa0e3bd..1b09d25014 100644
--- a/pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java
+++ b/pinot-spi/src/main/java/org/apache/pinot/spi/env/CommonsConfigurationUtils.java
@@ -29,12 +29,12 @@ import java.util.Optional;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import java.util.stream.StreamSupport;
+import javax.annotation.Nullable;
 import org.apache.commons.configuration2.Configuration;
 import org.apache.commons.configuration2.PropertiesConfiguration;
 import org.apache.commons.configuration2.convert.LegacyListDelimiterHandler;
 import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.commons.configuration2.io.FileHandler;
-import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.commons.lang3.StringUtils;
 
 
@@ -43,6 +43,7 @@ import org.apache.commons.lang3.StringUtils;
  */
 public class CommonsConfigurationUtils {
   private static final Character DEFAULT_LIST_DELIMITER = ',';
+
   private CommonsConfigurationUtils() {
   }
 
@@ -99,7 +100,8 @@ public class CommonsConfigurationUtils {
    * @return a {@link PropertiesConfiguration} instance.
    */
   public static PropertiesConfiguration fromInputStream(InputStream stream, boolean setIOFactory,
-      boolean setDefaultDelimiter) throws ConfigurationException {
+      boolean setDefaultDelimiter)
+      throws ConfigurationException {
     PropertiesConfiguration config = createPropertiesConfiguration(setIOFactory, setDefaultDelimiter);
     FileHandler fileHandler = new FileHandler(config);
     fileHandler.load(stream);
@@ -113,8 +115,8 @@ public class CommonsConfigurationUtils {
    * @param setDefaultDelimiter representing to set the default list delimiter.
    * @return a {@link PropertiesConfiguration} instance.
    */
-  public static PropertiesConfiguration fromFile(File file, boolean setIOFactory,
-      boolean setDefaultDelimiter) throws ConfigurationException {
+  public static PropertiesConfiguration fromFile(File file, boolean setIOFactory, boolean setDefaultDelimiter)
+      throws ConfigurationException {
     PropertiesConfiguration config = createPropertiesConfiguration(setIOFactory, setDefaultDelimiter);
     FileHandler fileHandler = new FileHandler(config);
     // check if file exists, load the properties otherwise set the file.
@@ -216,24 +218,40 @@ public class CommonsConfigurationUtils {
   /**
    * Replaces the special character in the given property value.
    * - Leading/trailing space is prefixed/suffixed with "\0"
-   * - Comma is replaces with "\0\0"
+   * - Comma is replaced with "\0\0"
+   * Returns {@code null} when the given value contains surrogate characters because it is not supported by
+   * {@link PropertiesConfiguration}.
    *
    * Note:
    * - '\0' is not allowed in string values, so we can use it as the replaced character
    * - Escaping comma with backslash doesn't work when comma is preceded by a backslash
    */
+  @Nullable
   public static String replaceSpecialCharacterInPropertyValue(String value) {
-    value = StringEscapeUtils.escapeJava(value);
-    if (value.isEmpty()) {
+    int length = value.length();
+    if (length == 0) {
       return value;
     }
+    boolean containsDelimiter = false;
+    for (int i = 0; i < length; i++) {
+      char c = value.charAt(i);
+      if (Character.isSurrogate(c)) {
+        return null;
+      }
+      if (c == DEFAULT_LIST_DELIMITER) {
+        containsDelimiter = true;
+      }
+    }
+    if (containsDelimiter) {
+      value = value.replace(",", "\0\0");
+    }
     if (value.charAt(0) == ' ') {
       value = "\0" + value;
     }
     if (value.charAt(value.length() - 1) == ' ') {
       value = value + "\0";
     }
-    return value.replace(",", "\0\0");
+    return value;
   }
 
   /**
@@ -241,12 +259,6 @@ public class CommonsConfigurationUtils {
    * {@link #replaceSpecialCharacterInPropertyValue(String)}.
    */
   public static String recoverSpecialCharacterInPropertyValue(String value) {
-    try {
-      // This is for backward compatibility, to handle the old commons library escape character behavior.
-      value = StringEscapeUtils.unescapeJava(value);
-    } catch (Exception e) {
-      // If the value is not a valid escaped string, ignore the exception and continue
-    }
     if (value.isEmpty()) {
       return value;
     }
@@ -270,13 +282,9 @@ public class CommonsConfigurationUtils {
 
     // setting DEFAULT_LIST_DELIMITER
     if (setDefaultDelimiter) {
-      setListDelimiterHandler(config);
+      config.setListDelimiterHandler(new LegacyListDelimiterHandler(DEFAULT_LIST_DELIMITER));
     }
 
     return config;
   }
-
-  private static void setListDelimiterHandler(PropertiesConfiguration configuration) {
-    configuration.setListDelimiterHandler(new LegacyListDelimiterHandler(DEFAULT_LIST_DELIMITER));
-  }
 }
diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/env/CommonsConfigurationUtilsTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/env/CommonsConfigurationUtilsTest.java
index 9d31a7926f..af40022794 100644
--- a/pinot-spi/src/test/java/org/apache/pinot/spi/env/CommonsConfigurationUtilsTest.java
+++ b/pinot-spi/src/test/java/org/apache/pinot/spi/env/CommonsConfigurationUtilsTest.java
@@ -21,7 +21,6 @@ package org.apache.pinot.spi.env;
 import java.io.File;
 import java.io.IOException;
 import org.apache.commons.configuration2.PropertiesConfiguration;
-import org.apache.commons.configuration2.ex.ConfigurationException;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -30,13 +29,13 @@ import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertTrue;
 
 
 public class CommonsConfigurationUtilsTest {
   private static final File TEMP_DIR = new File(FileUtils.getTempDirectory(), "CommonsConfigurationUtilsTest");
   private static final File CONFIG_FILE = new File(TEMP_DIR, "config");
   private static final String PROPERTY_KEY = "testKey";
-  private static final String LIST_PROPERTY_KEY = "listTestKey";
   private static final int NUM_ROUNDS = 10000;
 
   @BeforeClass
@@ -53,7 +52,7 @@ public class CommonsConfigurationUtilsTest {
 
   @Test
   public void testPropertyValueWithSpecialCharacters()
-      throws ConfigurationException {
+      throws Exception {
     // Leading/trailing whitespace
     testPropertyValueWithSpecialCharacters(" a");
     testPropertyValueWithSpecialCharacters("a ");
@@ -93,17 +92,29 @@ public class CommonsConfigurationUtilsTest {
   }
 
   private void testPropertyValueWithSpecialCharacters(String value)
-      throws ConfigurationException {
+      throws Exception {
     String replacedValue = CommonsConfigurationUtils.replaceSpecialCharacterInPropertyValue(value);
+    if (replacedValue == null) {
+      boolean hasSurrogate = false;
+      int length = value.length();
+      for (int i = 0; i < length; i++) {
+        if (Character.isSurrogate(value.charAt(i))) {
+          hasSurrogate = true;
+          break;
+        }
+      }
+      assertTrue(hasSurrogate);
+      return;
+    }
 
     PropertiesConfiguration configuration = CommonsConfigurationUtils.fromFile(CONFIG_FILE, false, true);
     configuration.setProperty(PROPERTY_KEY, replacedValue);
     String recoveredValue = CommonsConfigurationUtils.recoverSpecialCharacterInPropertyValue(
         (String) configuration.getProperty(PROPERTY_KEY));
     assertEquals(recoveredValue, value);
+
     CommonsConfigurationUtils.saveToFile(configuration, CONFIG_FILE);
     configuration = CommonsConfigurationUtils.fromFile(CONFIG_FILE, false, true);
-
     recoveredValue = CommonsConfigurationUtils.recoverSpecialCharacterInPropertyValue(
         (String) configuration.getProperty(PROPERTY_KEY));
     assertEquals(recoveredValue, value);


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org