You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@carbondata.apache.org by GitBox <gi...@apache.org> on 2020/12/03 01:59:56 UTC

[GitHub] [carbondata] QiangCai commented on a change in pull request #4035: [CARBONDATA-4067]: CleanFiles Behaviour Change

QiangCai commented on a change in pull request #4035:
URL: https://github.com/apache/carbondata/pull/4035#discussion_r534596239



##########
File path: core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1095,7 +1099,8 @@ public static void deleteLoadsAndUpdateMetadata(CarbonTable carbonTable, boolean
             // if execute command 'clean files' or the number of invisible segment info
             // exceeds the value of 'carbon.invisible.segments.preserve.count',
             // it need to append the invisible segment list to 'tablestatus.history' file.
-            if (isForceDeletion || (invisibleSegmentCnt > invisibleSegmentPreserveCnt)) {
+            if (cleanStaleInprogress || cleanCompactedAndMFD || (invisibleSegmentCnt >

Review comment:
       no need move load metadata to history always

##########
File path: core/src/main/java/org/apache/carbondata/core/statusmanager/SegmentStatusManager.java
##########
@@ -1039,17 +1039,19 @@ private static void writeLoadMetadata(AbsoluteTableIdentifier identifier,
     }
   }
 
-  private static ReturnTuple isUpdateRequired(boolean isForceDeletion, CarbonTable carbonTable,
-      AbsoluteTableIdentifier absoluteTableIdentifier, LoadMetadataDetails[] details) {
+  private static ReturnTuple isUpdateRequired(boolean cleanStaleInProgress, boolean

Review comment:
       why remove isForceDeletion?

##########
File path: core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java
##########
@@ -173,40 +176,62 @@ public boolean accept(CarbonFile file) {
   }
 
   private static boolean checkIfLoadCanBeDeleted(LoadMetadataDetails oneLoad,
-      boolean isForceDelete) {
-    if ((SegmentStatus.MARKED_FOR_DELETE == oneLoad.getSegmentStatus() ||
-        SegmentStatus.COMPACTED == oneLoad.getSegmentStatus() ||
-        SegmentStatus.INSERT_IN_PROGRESS == oneLoad.getSegmentStatus() ||
-        SegmentStatus.INSERT_OVERWRITE_IN_PROGRESS == oneLoad.getSegmentStatus())
-        && oneLoad.getVisibility().equalsIgnoreCase("true")) {
-      if (isForceDelete) {
-        return true;
-      }
-      long deletionTime = oneLoad.getModificationOrDeletionTimestamp();
-      return TrashUtil.isTrashRetentionTimeoutExceeded(deletionTime) && CarbonUpdateUtil
-          .isMaxQueryTimeoutExceeded(deletionTime);
+      boolean cleanStaleInProgress, boolean cleanCompactedAndMFD) {
+    if (oneLoad.getVisibility().equalsIgnoreCase("true")) {
+      return checkLoadDeletionLogic(oneLoad, cleanCompactedAndMFD, cleanStaleInProgress);
     }
-
     return false;
   }
 
   private static boolean checkIfLoadCanBeDeletedPhysically(LoadMetadataDetails oneLoad,
-      boolean isForceDelete) {
+      boolean cleanCompactedAndMFD, boolean cleanStaleInProgress) {
     // Check if the segment is added externally and path is set then do not delete it
-    if ((SegmentStatus.MARKED_FOR_DELETE == oneLoad.getSegmentStatus()
-        || SegmentStatus.COMPACTED == oneLoad.getSegmentStatus()) && (oneLoad.getPath() == null
-        || oneLoad.getPath().equalsIgnoreCase("NA"))) {
-      if (isForceDelete) {
-        return true;
-      }
-      long deletionTime = oneLoad.getModificationOrDeletionTimestamp();
+    if (oneLoad.getPath() == null || oneLoad.getPath().equalsIgnoreCase("NA")) {
+      return checkLoadDeletionLogic(oneLoad, cleanCompactedAndMFD, cleanStaleInProgress);
+    }
+    return false;
+  }
 
-      return TrashUtil.isTrashRetentionTimeoutExceeded(deletionTime) && CarbonUpdateUtil
+  private static Boolean checkLoadDeletionLogic(LoadMetadataDetails oneLoad,
+      boolean cleanCompactedAndMFD, boolean cleanStaleInProgress) {
+    /*
+     * if cleanStaleInProgress == false and  cleanCompactedAndMFD == false, clean MFD and Compacted
+     *  segments after trashtimeout(7days) && query timeout(1 hr)
+     * if cleanStaleInProgress == false and  cleanCompactedAndMFD == true, clean MFD and Compacted
+     *  segments immediately
+     * if cleanStaleInProgress == true and  cleanCompactedAndMFD == false, clean Stale Inprogress
+     *  segments after 7 days(taking carbon.trash.retention.time value)
+     * if cleanStaleInProgress == true and  cleanCompactedAndMFD == true, clean MFD, Compacted and
+     *  stale inprogress segments immediately.
+     */
+    if (!cleanCompactedAndMFD && !cleanStaleInProgress) {
+      if (SegmentStatus.COMPACTED == oneLoad.getSegmentStatus() || SegmentStatus
+          .MARKED_FOR_DELETE == oneLoad.getSegmentStatus()) {
+        long deletionTime = oneLoad.getModificationOrDeletionTimestamp();
+        return TrashUtil.isTrashRetentionTimeoutExceeded(deletionTime) && CarbonUpdateUtil
           .isMaxQueryTimeoutExceeded(deletionTime);
-
+      }
+      return false;
+    } else if (cleanCompactedAndMFD && !cleanStaleInProgress) {
+      return SegmentStatus.COMPACTED == oneLoad.getSegmentStatus() || SegmentStatus
+        .MARKED_FOR_DELETE == oneLoad.getSegmentStatus();
+    } else if (!cleanCompactedAndMFD) {

Review comment:
       It is hard to understand each if condition.
   please improve the whole if code.

##########
File path: core/src/main/java/org/apache/carbondata/core/util/DeleteLoadFolders.java
##########
@@ -173,40 +176,62 @@ public boolean accept(CarbonFile file) {
   }
 
   private static boolean checkIfLoadCanBeDeleted(LoadMetadataDetails oneLoad,
-      boolean isForceDelete) {
-    if ((SegmentStatus.MARKED_FOR_DELETE == oneLoad.getSegmentStatus() ||
-        SegmentStatus.COMPACTED == oneLoad.getSegmentStatus() ||
-        SegmentStatus.INSERT_IN_PROGRESS == oneLoad.getSegmentStatus() ||
-        SegmentStatus.INSERT_OVERWRITE_IN_PROGRESS == oneLoad.getSegmentStatus())
-        && oneLoad.getVisibility().equalsIgnoreCase("true")) {
-      if (isForceDelete) {
-        return true;
-      }
-      long deletionTime = oneLoad.getModificationOrDeletionTimestamp();
-      return TrashUtil.isTrashRetentionTimeoutExceeded(deletionTime) && CarbonUpdateUtil
-          .isMaxQueryTimeoutExceeded(deletionTime);
+      boolean cleanStaleInProgress, boolean cleanCompactedAndMFD) {
+    if (oneLoad.getVisibility().equalsIgnoreCase("true")) {
+      return checkLoadDeletionLogic(oneLoad, cleanCompactedAndMFD, cleanStaleInProgress);
     }
-
     return false;
   }
 
   private static boolean checkIfLoadCanBeDeletedPhysically(LoadMetadataDetails oneLoad,
-      boolean isForceDelete) {
+      boolean cleanCompactedAndMFD, boolean cleanStaleInProgress) {
     // Check if the segment is added externally and path is set then do not delete it
-    if ((SegmentStatus.MARKED_FOR_DELETE == oneLoad.getSegmentStatus()
-        || SegmentStatus.COMPACTED == oneLoad.getSegmentStatus()) && (oneLoad.getPath() == null
-        || oneLoad.getPath().equalsIgnoreCase("NA"))) {
-      if (isForceDelete) {
-        return true;
-      }
-      long deletionTime = oneLoad.getModificationOrDeletionTimestamp();
+    if (oneLoad.getPath() == null || oneLoad.getPath().equalsIgnoreCase("NA")) {
+      return checkLoadDeletionLogic(oneLoad, cleanCompactedAndMFD, cleanStaleInProgress);
+    }
+    return false;
+  }
 
-      return TrashUtil.isTrashRetentionTimeoutExceeded(deletionTime) && CarbonUpdateUtil
+  private static Boolean checkLoadDeletionLogic(LoadMetadataDetails oneLoad,
+      boolean cleanCompactedAndMFD, boolean cleanStaleInProgress) {
+    /*
+     * if cleanStaleInProgress == false and  cleanCompactedAndMFD == false, clean MFD and Compacted
+     *  segments after trashtimeout(7days) && query timeout(1 hr)
+     * if cleanStaleInProgress == false and  cleanCompactedAndMFD == true, clean MFD and Compacted
+     *  segments immediately
+     * if cleanStaleInProgress == true and  cleanCompactedAndMFD == false, clean Stale Inprogress
+     *  segments after 7 days(taking carbon.trash.retention.time value)
+     * if cleanStaleInProgress == true and  cleanCompactedAndMFD == true, clean MFD, Compacted and
+     *  stale inprogress segments immediately.
+     */
+    if (!cleanCompactedAndMFD && !cleanStaleInProgress) {
+      if (SegmentStatus.COMPACTED == oneLoad.getSegmentStatus() || SegmentStatus
+          .MARKED_FOR_DELETE == oneLoad.getSegmentStatus()) {
+        long deletionTime = oneLoad.getModificationOrDeletionTimestamp();
+        return TrashUtil.isTrashRetentionTimeoutExceeded(deletionTime) && CarbonUpdateUtil
           .isMaxQueryTimeoutExceeded(deletionTime);
-
+      }
+      return false;
+    } else if (cleanCompactedAndMFD && !cleanStaleInProgress) {

Review comment:
       cleanCompactedAndMFD means forceDelete ?

##########
File path: core/src/main/java/org/apache/carbondata/core/util/CleanFilesUtil.java
##########
@@ -147,17 +147,28 @@ public static void cleanStaleSegmentsForPartitionTable(CarbonTable carbonTable)
    * stale segment. Only comparing from tablestatus file, not checking tablestatus.history file
    */
   private static void getStaleSegmentFiles(CarbonTable carbonTable, List<String> staleSegmentFiles,
-      List<String> redundantSegmentFile) {
+      List<String> redundantSegmentFile) throws IOException {
     String segmentFilesLocation =
         CarbonTablePath.getSegmentFilesLocation(carbonTable.getTablePath());
     List<String> segmentFiles = Arrays.stream(FileFactory.getCarbonFile(segmentFilesLocation)
         .listFiles()).map(CarbonFile::getName).collect(Collectors.toList());
     // there are no segments present in the Metadata folder. Can return here
-    if (segmentFiles.size() == 0) {
-      return;
+    // if table status file does not exist return
+    try {
+      if (segmentFiles.size() == 0 || !FileFactory.isFileExist(CarbonTablePath

Review comment:
       maybe not require to check tablestatus




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org