You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by kh...@apache.org on 2022/09/10 10:29:52 UTC

[pinot] branch master updated: Improve segment name check in metadata push (#9359)

This is an automated email from the ASF dual-hosted git repository.

kharekartik pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new ff98c5ad1b Improve segment name check in metadata push (#9359)
ff98c5ad1b is described below

commit ff98c5ad1b4a50032f53c2b9e6b634f26f0b4785
Author: Haitao Zhang <ha...@startree.ai>
AuthorDate: Sat Sep 10 03:29:45 2022 -0700

    Improve segment name check in metadata push (#9359)
    
    * Improve segment name check in metadata push
    
    * address comments
    
    * fix broken tests
---
 .../segment/local/utils/SegmentPushUtils.java      |  7 ++++--
 .../creator/name/FixedSegmentNameGenerator.java    |  4 +--
 .../name/NormalizedDateSegmentNameGenerator.java   |  8 +++---
 .../spi/creator/name/SegmentNameGenerator.java     | 12 ---------
 .../spi/creator/name/SegmentNameUtils.java}        | 29 ++++++++++++----------
 .../creator/name/SimpleSegmentNameGenerator.java   | 18 ++++++++------
 .../name/FixedSegmentNameGeneratorTest.java        |  2 +-
 .../name/SimpleSegmentNameGeneratorTest.java       |  2 +-
 8 files changed, 38 insertions(+), 44 deletions(-)

diff --git a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
index b756c7f760..cee66b2b9a 100644
--- a/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
+++ b/pinot-segment-local/src/main/java/org/apache/pinot/segment/local/utils/SegmentPushUtils.java
@@ -44,6 +44,7 @@ import org.apache.pinot.common.utils.SimpleHttpResponse;
 import org.apache.pinot.common.utils.TarGzCompressionUtils;
 import org.apache.pinot.common.utils.http.HttpClient;
 import org.apache.pinot.segment.spi.V1Constants;
+import org.apache.pinot.segment.spi.creator.name.SegmentNameUtils;
 import org.apache.pinot.spi.auth.AuthProvider;
 import org.apache.pinot.spi.config.table.TableType;
 import org.apache.pinot.spi.filesystem.PinotFS;
@@ -237,8 +238,10 @@ public class SegmentPushUtils implements Serializable {
     for (String segmentUriPath : segmentUriToTarPathMap.keySet()) {
       String tarFilePath = segmentUriToTarPathMap.get(segmentUriPath);
       String fileName = new File(tarFilePath).getName();
-      Preconditions.checkArgument(fileName.endsWith(Constants.TAR_GZ_FILE_EXT));
-      String segmentName = fileName.substring(0, fileName.length() - Constants.TAR_GZ_FILE_EXT.length());
+      // segments stored in Pinot deep store do not have .tar.gz extension
+      String segmentName = fileName.endsWith(Constants.TAR_GZ_FILE_EXT)
+          ? fileName.substring(0, fileName.length() - Constants.TAR_GZ_FILE_EXT.length()) : fileName;
+      SegmentNameUtils.validatePartialOrFullSegmentName(segmentName);
       File segmentMetadataFile = generateSegmentMetadataFile(fileSystem, URI.create(tarFilePath));
       AuthProvider authProvider = AuthProviderUtils.makeAuthProvider(spec.getAuthToken());
       try {
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGenerator.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGenerator.java
index cc4acb0c0b..cf9eaed03a 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGenerator.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGenerator.java
@@ -31,9 +31,7 @@ public class FixedSegmentNameGenerator implements SegmentNameGenerator {
 
   public FixedSegmentNameGenerator(String segmentName) {
     Preconditions.checkArgument(segmentName != null, "Missing segmentName for FixedSegmentNameGenerator");
-    Preconditions
-        .checkArgument(isValidSegmentName(segmentName), "Invalid segmentName: %s for FixedSegmentNameGenerator",
-            segmentName);
+    SegmentNameUtils.validatePartialOrFullSegmentName(segmentName);
     _segmentName = segmentName;
   }
 
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGenerator.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGenerator.java
index 4382413314..f313d65829 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGenerator.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/NormalizedDateSegmentNameGenerator.java
@@ -66,14 +66,14 @@ public class NormalizedDateSegmentNameGenerator implements SegmentNameGenerator
     _segmentNamePrefix = segmentNamePrefix != null ? segmentNamePrefix.trim() : tableName;
     Preconditions
         .checkArgument(_segmentNamePrefix != null, "Missing segmentNamePrefix for NormalizedDateSegmentNameGenerator");
-    Preconditions.checkArgument(isValidSegmentName(_segmentNamePrefix),
-        "Invalid segmentNamePrefix: %s for NormalizedDateSegmentNameGenerator", _segmentNamePrefix);
+    SegmentNameUtils.validatePartialOrFullSegmentName(_segmentNamePrefix);
     _excludeSequenceId = excludeSequenceId;
     _appendPushType = "APPEND".equalsIgnoreCase(pushType);
     _segmentNamePostfix = segmentNamePostfix != null ? segmentNamePostfix.trim() : null;
     _appendUUIDToSegmentName = appendUUIDToSegmentName;
-    Preconditions.checkArgument(_segmentNamePostfix == null || isValidSegmentName(_segmentNamePostfix),
-        "Invalid segmentNamePostfix: %s for NormalizedDateSegmentNameGenerator", _segmentNamePostfix);
+    if (_segmentNamePostfix != null) {
+      SegmentNameUtils.validatePartialOrFullSegmentName(_segmentNamePostfix);
+    }
 
     // Include time info for APPEND push type
     if (_appendPushType) {
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGenerator.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGenerator.java
index 5ff0afa305..ddbb208b03 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGenerator.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameGenerator.java
@@ -20,7 +20,6 @@ package org.apache.pinot.segment.spi.creator.name;
 
 import com.google.common.base.Joiner;
 import java.io.Serializable;
-import java.util.regex.Pattern;
 import javax.annotation.Nullable;
 
 
@@ -28,19 +27,8 @@ import javax.annotation.Nullable;
  * Interface for segment name generator based on the segment sequence id and time range.
  */
 public interface SegmentNameGenerator extends Serializable {
-  Pattern INVALID_SEGMENT_NAME_REGEX = Pattern.compile(".*[\\\\/:\\*?\"<>|].*");
   Joiner JOINER = Joiner.on('_').skipNulls();
 
-  /**
-   * A handy util to validate if segment name is valid.
-   *
-   * @param segmentName provide segment name
-   * @return true if segmentName is valid.
-   */
-  default boolean isValidSegmentName(String segmentName) {
-    return !INVALID_SEGMENT_NAME_REGEX.matcher(segmentName).matches();
-  }
-
   /**
    * Generates the segment name.
    *
diff --git a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGeneratorTest.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameUtils.java
similarity index 56%
copy from pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGeneratorTest.java
copy to pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameUtils.java
index 2205e379e5..f3000c994a 100644
--- a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGeneratorTest.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SegmentNameUtils.java
@@ -18,23 +18,26 @@
  */
 package org.apache.pinot.segment.spi.creator.name;
 
-import org.testng.Assert;
-import org.testng.annotations.Test;
+import java.util.regex.Pattern;
 
-import static org.testng.Assert.assertEquals;
 
+/**
+ * Utils for segment names.
+ */
+public class SegmentNameUtils {
+  private static final Pattern INVALID_SEGMENT_NAME_REGEX = Pattern.compile(".*[\\\\/:\\*?\"<>|].*");
 
-public class FixedSegmentNameGeneratorTest {
+  private SegmentNameUtils() {
+  }
 
-  @Test
-  public void testWithMalFormedSegmentName() {
-    assertEquals(new FixedSegmentNameGenerator("seg01").generateSegmentName(0, null, null), "seg01");
-    try {
-      new FixedSegmentNameGenerator("seg*01").generateSegmentName(0, null, null);
-      Assert.fail();
-    } catch (IllegalArgumentException e) {
-      // Expected
-      assertEquals(e.getMessage(), "Invalid segmentName: seg*01 for FixedSegmentNameGenerator");
+  /**
+   * A handy util to validate if segment name is valid.
+   *
+   * @param partialOrFullSegmentName provide partial or full segment name
+   */
+  public static void validatePartialOrFullSegmentName(String partialOrFullSegmentName) {
+    if (INVALID_SEGMENT_NAME_REGEX.matcher(partialOrFullSegmentName).matches()) {
+      throw new IllegalArgumentException("Invalid partial or full segment name: " + partialOrFullSegmentName);
     }
   }
 }
diff --git a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SimpleSegmentNameGenerator.java b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SimpleSegmentNameGenerator.java
index 408c7231d2..1fc7eb92a7 100644
--- a/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SimpleSegmentNameGenerator.java
+++ b/pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/creator/name/SimpleSegmentNameGenerator.java
@@ -48,10 +48,10 @@ public class SimpleSegmentNameGenerator implements SegmentNameGenerator {
   public SimpleSegmentNameGenerator(String segmentNamePrefix, @Nullable String segmentNamePostfix,
       boolean appendUUIDToSegmentName) {
     Preconditions.checkArgument(segmentNamePrefix != null, "Missing segmentNamePrefix for SimpleSegmentNameGenerator");
-    Preconditions.checkArgument(isValidSegmentName(segmentNamePrefix),
-        "Invalid segmentNamePrefix: %s for SimpleSegmentNameGenerator", segmentNamePrefix);
-    Preconditions.checkArgument(segmentNamePostfix == null || isValidSegmentName(segmentNamePostfix),
-        "Invalid segmentNamePostfix: %s for SimpleSegmentNameGenerator", segmentNamePostfix);
+    SegmentNameUtils.validatePartialOrFullSegmentName(segmentNamePrefix);
+    if (segmentNamePostfix != null) {
+      SegmentNameUtils.validatePartialOrFullSegmentName(segmentNamePostfix);
+    }
     _segmentNamePrefix = segmentNamePrefix;
     _segmentNamePostfix = segmentNamePostfix;
     _appendUUIDToSegmentName = appendUUIDToSegmentName;
@@ -59,10 +59,12 @@ public class SimpleSegmentNameGenerator implements SegmentNameGenerator {
 
   @Override
   public String generateSegmentName(int sequenceId, @Nullable Object minTimeValue, @Nullable Object maxTimeValue) {
-    Preconditions.checkArgument(minTimeValue == null || isValidSegmentName(minTimeValue.toString()),
-        "Invalid minTimeValue: %s for SimpleSegmentNameGenerator", minTimeValue);
-    Preconditions.checkArgument(maxTimeValue == null || isValidSegmentName(maxTimeValue.toString()),
-        "Invalid maxTimeValue: %s for SimpleSegmentNameGenerator", maxTimeValue);
+    if (minTimeValue != null) {
+      SegmentNameUtils.validatePartialOrFullSegmentName(minTimeValue.toString());
+    }
+    if (maxTimeValue != null) {
+      SegmentNameUtils.validatePartialOrFullSegmentName(maxTimeValue.toString());
+    }
 
     return JOINER.join(_segmentNamePrefix, minTimeValue, maxTimeValue, _segmentNamePostfix,
         sequenceId >= 0 ? sequenceId : null, _appendUUIDToSegmentName ? UUID.randomUUID() : null);
diff --git a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGeneratorTest.java b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGeneratorTest.java
index 2205e379e5..de2991b624 100644
--- a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGeneratorTest.java
+++ b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/FixedSegmentNameGeneratorTest.java
@@ -34,7 +34,7 @@ public class FixedSegmentNameGeneratorTest {
       Assert.fail();
     } catch (IllegalArgumentException e) {
       // Expected
-      assertEquals(e.getMessage(), "Invalid segmentName: seg*01 for FixedSegmentNameGenerator");
+      assertEquals(e.getMessage(), "Invalid partial or full segment name: seg*01");
     }
   }
 }
diff --git a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/SimpleSegmentNameGeneratorTest.java b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/SimpleSegmentNameGeneratorTest.java
index 5bf336a91a..839eb1e845 100644
--- a/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/SimpleSegmentNameGeneratorTest.java
+++ b/pinot-segment-spi/src/test/java/org/apache/pinot/segment/spi/creator/name/SimpleSegmentNameGeneratorTest.java
@@ -80,7 +80,7 @@ public class SimpleSegmentNameGeneratorTest {
       Assert.fail();
     } catch (IllegalArgumentException e) {
       // Expected
-      assertEquals(e.getMessage(), "Invalid maxTimeValue: 12|34 for SimpleSegmentNameGenerator");
+      assertEquals(e.getMessage(), "Invalid partial or full segment name: 12|34");
     }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org