You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2020/07/24 05:42:34 UTC

[carbondata] branch master updated: [CARBONDATA-3918] Fix extra data in count(*) after multiple updates and index server running

This is an automated email from the ASF dual-hosted git repository.

kunalkapoor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 3161a42  [CARBONDATA-3918] Fix extra data in count(*) after multiple updates and index server running
3161a42 is described below

commit 3161a42011902fd7ad4e2ee86ac818d2d554d77d
Author: akashrn5 <ak...@gmail.com>
AuthorDate: Mon Jul 20 11:22:50 2020 +0530

    [CARBONDATA-3918] Fix extra data in count(*) after multiple updates and index server running
    
    Why is this PR needed?
    Select count * gives extra data after multiple updates with the index server running.
    This is because, once the horizontal compaction happens, it stores the index fils to cache
    and create new index and data files, so if the table is updated or deleted table, we will
    exclude those splits after getting all splits. Since once the splits come from index server
    since loadmetadatadetails are transient in Segment object, we will have null value for it as
    the slits are serialized from index server. Because of which it won't be able to filter out
    the IUD old segments. So it leads to extra data in count *.
    
    What changes were proposed in this PR?
    Once we get the splits from the index server, then from the validSegments, get the
    loadmetadataDetails and readCommittedScope and set into the splits which solve this problem.
    
    This closes #3853
---
 .../java/org/apache/carbondata/core/index/Segment.java   |  4 ++++
 .../carbondata/hadoop/api/CarbonTableInputFormat.java    | 16 ++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/core/src/main/java/org/apache/carbondata/core/index/Segment.java b/core/src/main/java/org/apache/carbondata/core/index/Segment.java
index e76253d..8fb22bc 100644
--- a/core/src/main/java/org/apache/carbondata/core/index/Segment.java
+++ b/core/src/main/java/org/apache/carbondata/core/index/Segment.java
@@ -309,6 +309,10 @@ public class Segment implements Serializable, Writable {
     return loadMetadataDetails;
   }
 
+  public void setLoadMetadataDetails(LoadMetadataDetails loadMetadataDetails) {
+    this.loadMetadataDetails = loadMetadataDetails;
+  }
+
   public long getIndexSize() {
     return indexSize;
   }
diff --git a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
index bd0f5d1..e61f742 100644
--- a/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
+++ b/hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
@@ -356,6 +356,7 @@ public class CarbonTableInputFormat<T> extends CarbonInputFormat<T> {
         getDataBlocksOfSegment(job, carbonTable, expression, validSegments,
             invalidSegments, segmentsToBeRefreshed);
     numBlocks = dataBlocksOfSegment.size();
+    updateLoadMetaDataDetailsToSegments(validSegments, dataBlocksOfSegment);
     for (org.apache.carbondata.hadoop.CarbonInputSplit inputSplit : dataBlocksOfSegment) {
 
       // Get the UpdateVO for those tables on which IUD operations being performed.
@@ -386,6 +387,21 @@ public class CarbonTableInputFormat<T> extends CarbonInputFormat<T> {
     return result;
   }
 
+  public void updateLoadMetaDataDetailsToSegments(List<Segment> validSegments,
+      List<org.apache.carbondata.hadoop.CarbonInputSplit> prunedSplits) {
+    for (CarbonInputSplit split : prunedSplits) {
+      Segment segment = split.getSegment();
+      if (segment.getLoadMetadataDetails() == null || segment.getReadCommittedScope() == null) {
+        if (validSegments.contains(segment)) {
+          segment.setLoadMetadataDetails(
+              validSegments.get(validSegments.indexOf(segment)).getLoadMetadataDetails());
+          segment.setReadCommittedScope(
+              validSegments.get(validSegments.indexOf(segment)).getReadCommittedScope());
+        }
+      }
+    }
+  }
+
   /**
    * return valid segment to access
    */