You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by aj...@apache.org on 2020/04/13 15:57:41 UTC

[carbondata] branch master updated: [CARBONDATA-3751] Segments are not Marked for delete if everything is deleted in a segment with index server enabled

This is an automated email from the ASF dual-hosted git repository.

ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 638e3e4  [CARBONDATA-3751] Segments are not Marked for delete if everything is deleted in a segment with index server enabled
638e3e4 is described below

commit 638e3e4eb28b10da305c082f4acd93746eaa237d
Author: Vikram Ahuja <vi...@gmail.com>
AuthorDate: Wed Mar 25 16:56:54 2020 +0530

    [CARBONDATA-3751] Segments are not Marked for delete if everything is deleted in a segment with index server enabled
    
    Why is this PR needed?
    When all the rows are deleted from a segment with index server enabled, the segment is not Marked for delete.
    
    What changes were proposed in this PR?
    Rowcount was always being sent as 0 from the index server DistributedPruneRDD. Getting that value from DataMapRow now
    
    Does this PR introduce any user interface change?
    No
    
    Is any new testcase added?
    No
    
    This closes #3679
---
 .../org/apache/carbondata/core/indexstore/ExtendedBlocklet.java     | 6 +++++-
 .../main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java    | 4 ++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/ExtendedBlocklet.java b/core/src/main/java/org/apache/carbondata/core/indexstore/ExtendedBlocklet.java
index 37075c8..cd29e40 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/ExtendedBlocklet.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/ExtendedBlocklet.java
@@ -25,6 +25,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexRowIndexes;
 import org.apache.carbondata.core.indexstore.row.IndexRow;
 import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
 import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
@@ -168,7 +169,10 @@ public class ExtendedBlocklet extends Blocklet {
       throws IOException {
     super.write(out);
     if (isCountJob) {
-      out.writeLong(inputSplit.getRowCount());
+      // In CarbonInputSplit, getDetailInfo() is a lazy call. we want to avoid this during
+      // countStar query. As rowCount is filled inside getDetailInfo(). In countStar case we may
+      // not have proper row count. So, always take row count from indexRow.
+      out.writeLong(inputSplit.getIndexRow().getInt(BlockletIndexRowIndexes.ROW_COUNT_INDEX));
       out.writeUTF(inputSplit.getSegmentId());
     } else {
       if (dataMapUniqueId == null) {
diff --git a/core/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java b/core/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
index 80eb3a9..5cfb4b1 100644
--- a/core/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
+++ b/core/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
@@ -735,6 +735,10 @@ public class CarbonInputSplit extends FileSplit
     return path;
   }
 
+  public IndexRow getIndexRow() {
+    return indexRow;
+  }
+
   public String getFilePath() {
     return this.filePath;
   }