You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2021/03/18 00:54:58 UTC

[GitHub] [incubator-pinot] yupeng9 commented on a change in pull request #6567: Enable segment upload to a realtime table

yupeng9 commented on a change in pull request #6567:
URL: https://github.com/apache/incubator-pinot/pull/6567#discussion_r596473726



##########
File path: pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java
##########
@@ -118,6 +125,47 @@ public static void fetchSegmentToLocal(String uri, File dest)
     fetchSegmentToLocal(new URI(uri), dest);
   }
 
+  /**
+   * Fetches a segment from a given URI and untar the segment file to the dest dir (i.e., tableDataDir + segmentName).
+   */
+  public static void fetchAndUntarSegmentToLocal(String uri, File tableDataDir, String segmentName)

Review comment:
       @npawar fyi, shall this be part of the `SegmentUploader` implementation?

##########
File path: pinot-controller/src/main/java/org/apache/pinot/controller/helix/core/assignment/segment/RealtimeSegmentAssignment.java
##########
@@ -172,6 +183,39 @@ private void checkReplication(InstancePartitions instancePartitions) {
     }
   }
 
+  private int getSegmentPartitionId(String segmentName) {
+    // A fast path if the segmentName is a LLC segment name and we can get the partition id from the name directly.
+    if (LLCSegmentName.isLowLevelConsumerSegmentName(segmentName)) {
+      return new LLCSegmentName(segmentName).getPartitionGroupId();
+    }
+    // Otherwise, retrieve the partition id from the segment zk metadata. Currently only realtime segments from upsert
+    // enabled tables have partition ids in their segment metadata.
+    RealtimeSegmentZKMetadata segmentZKMetadata = ZKMetadataProvider
+        .getRealtimeSegmentZKMetadata(_helixManager.getHelixPropertyStore(), _realtimeTableName, segmentName);
+    Preconditions
+        .checkState(segmentZKMetadata != null, "Failed to find segment ZK metadata for segment: %s of table: %s",
+            segmentName, _realtimeTableName);
+    return getSegmentPartitionIdFromZkMetaData(segmentZKMetadata);
+  }
+
+  private int getSegmentPartitionIdFromZkMetaData(RealtimeSegmentZKMetadata segmentZKMetadata) {
+    String segmentName = segmentZKMetadata.getSegmentName();
+    Preconditions.checkState(segmentZKMetadata.getPartitionMetadata() != null,
+        "Segment ZK metadata for segment: %s of table: %s does not contain partition metadata", segmentName,
+        _realtimeTableName);
+
+    ColumnPartitionMetadata partitionMetadata =
+        segmentZKMetadata.getPartitionMetadata().getColumnPartitionMap().get(_partitionColumn);
+    Preconditions.checkState(partitionMetadata != null,
+        "Segment ZK metadata for segment: %s of table: %s does not contain partition metadata for column: %s",

Review comment:
       shall we make the message more informative, by adding `checking if the table is in upsert mode`?

##########
File path: pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java
##########
@@ -118,6 +125,47 @@ public static void fetchSegmentToLocal(String uri, File dest)
     fetchSegmentToLocal(new URI(uri), dest);
   }
 
+  /**
+   * Fetches a segment from a given URI and untar the segment file to the dest dir (i.e., tableDataDir + segmentName).
+   */
+  public static void fetchAndUntarSegmentToLocal(String uri, File tableDataDir, String segmentName)
+      throws Exception {
+    File tempDir = new File(tableDataDir, "tmp-" + segmentName + "-" + UUID.randomUUID());
+    FileUtils.forceMkdir(tempDir);
+    File tempTarFile = new File(tempDir, segmentName + TAR_GZ_SUFFIX);
+    File tempSegmentDir = new File(tempDir, segmentName);
+    try {
+      try {
+        SegmentFetcherFactory.fetchSegmentToLocal(uri, tempTarFile);
+        LOGGER.info("Downloaded tarred segment: {} from: {} to: {}, file length: {}", segmentName, uri, tempTarFile,
+            tempTarFile.length());
+      } catch (AttemptsExceededException e) {
+        LOGGER.error("Attempts exceeded when downloading segment: {} from: {} to: {}", segmentName, uri,
+            tempTarFile, e);
+        Utils.rethrowException(e);
+      }
+
+      try {
+        // If an exception is thrown when untarring, it means the tar file is broken OR not found after the retry.
+        // Thus, there's no need to retry again.
+        File tempIndexDir = TarGzCompressionUtils.untar(tempTarFile, tempSegmentDir).get(0);
+        File segmentDir = new File(tableDataDir, segmentName);
+        if (segmentDir.exists()) {
+          LOGGER.info("Deleting existing index directory for segment: {}", segmentName);
+          FileUtils.deleteDirectory(segmentDir);
+        }
+        FileUtils.moveDirectory(tempIndexDir, segmentDir);
+        LOGGER.info("Successfully downloaded segment: {} to: {}", segmentName, segmentDir);
+      } catch (Exception e) {
+        LOGGER.error("Exception when untarring segment: {} for from {} to {}", segmentName, tempTarFile, tempSegmentDir,

Review comment:
       no need for `for` in the msg?

##########
File path: pinot-core/src/main/java/org/apache/pinot/core/data/manager/realtime/RealtimeTableDataManager.java
##########
@@ -543,4 +556,41 @@ private boolean isValid(Schema schema, IndexingConfig indexingConfig) {
     }
     return isValid;
   }
+
+  private int getSegmentPartitionId(String segmentName, String tableName) {

Review comment:
       this logic is repeated in `RealtimeSegmentAssignment.java`?

##########
File path: pinot-common/src/main/java/org/apache/pinot/common/utils/CommonConstants.java
##########
@@ -358,7 +358,7 @@
   public static class Segment {
     public static class Realtime {
       public enum Status {
-        IN_PROGRESS, DONE
+        IN_PROGRESS, DONE, UPLOAD

Review comment:
       +1 to @mcvsubbu's suggestion. @chenboat is this status/flow documented somewhere in the design doc? 

##########
File path: pinot-common/src/main/java/org/apache/pinot/common/utils/fetcher/SegmentFetcherFactory.java
##########
@@ -118,6 +125,47 @@ public static void fetchSegmentToLocal(String uri, File dest)
     fetchSegmentToLocal(new URI(uri), dest);
   }
 
+  /**
+   * Fetches a segment from a given URI and untar the segment file to the dest dir (i.e., tableDataDir + segmentName).
+   */
+  public static void fetchAndUntarSegmentToLocal(String uri, File tableDataDir, String segmentName)
+      throws Exception {
+    File tempDir = new File(tableDataDir, "tmp-" + segmentName + "-" + UUID.randomUUID());
+    FileUtils.forceMkdir(tempDir);
+    File tempTarFile = new File(tempDir, segmentName + TAR_GZ_SUFFIX);
+    File tempSegmentDir = new File(tempDir, segmentName);
+    try {
+      try {
+        SegmentFetcherFactory.fetchSegmentToLocal(uri, tempTarFile);
+        LOGGER.info("Downloaded tarred segment: {} from: {} to: {}, file length: {}", segmentName, uri, tempTarFile,
+            tempTarFile.length());
+      } catch (AttemptsExceededException e) {
+        LOGGER.error("Attempts exceeded when downloading segment: {} from: {} to: {}", segmentName, uri,
+            tempTarFile, e);
+        Utils.rethrowException(e);
+      }
+
+      try {
+        // If an exception is thrown when untarring, it means the tar file is broken OR not found after the retry.
+        // Thus, there's no need to retry again.
+        File tempIndexDir = TarGzCompressionUtils.untar(tempTarFile, tempSegmentDir).get(0);
+        File segmentDir = new File(tableDataDir, segmentName);
+        if (segmentDir.exists()) {
+          LOGGER.info("Deleting existing index directory for segment: {}", segmentName);
+          FileUtils.deleteDirectory(segmentDir);
+        }
+        FileUtils.moveDirectory(tempIndexDir, segmentDir);
+        LOGGER.info("Successfully downloaded segment: {} to: {}", segmentName, segmentDir);
+      } catch (Exception e) {
+        LOGGER.error("Exception when untarring segment: {} for from {} to {}", segmentName, tempTarFile, tempSegmentDir,
+            e);
+        Utils.rethrowException(e);
+      }
+    } finally {
+      FileUtils.deleteQuietly(tempDir);

Review comment:
       the tmp files might not be cleaned up in the process crashed before hitting `finally`.  Do we have some mechanism to recycle zombie files?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org