You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by aj...@apache.org on 2020/04/13 15:57:41 UTC
[carbondata] branch master updated: [CARBONDATA-3751] Segments are
not Marked for delete if everything is deleted in a segment with index
server enabled
This is an automated email from the ASF dual-hosted git repository.
ajantha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 638e3e4 [CARBONDATA-3751] Segments are not Marked for delete if everything is deleted in a segment with index server enabled
638e3e4 is described below
commit 638e3e4eb28b10da305c082f4acd93746eaa237d
Author: Vikram Ahuja <vi...@gmail.com>
AuthorDate: Wed Mar 25 16:56:54 2020 +0530
[CARBONDATA-3751] Segments are not Marked for delete if everything is deleted in a segment with index server enabled
Why is this PR needed?
When all the rows are deleted from a segment with index server enabled, the segment is not Marked for delete.
What changes were proposed in this PR?
Rowcount was always being sent as 0 from the index server DistributedPruneRDD. Getting that value from DataMapRow now
Does this PR introduce any user interface change?
No
Is any new testcase added?
No
This closes #3679
---
.../org/apache/carbondata/core/indexstore/ExtendedBlocklet.java | 6 +++++-
.../main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java | 4 ++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/core/src/main/java/org/apache/carbondata/core/indexstore/ExtendedBlocklet.java b/core/src/main/java/org/apache/carbondata/core/indexstore/ExtendedBlocklet.java
index 37075c8..cd29e40 100644
--- a/core/src/main/java/org/apache/carbondata/core/indexstore/ExtendedBlocklet.java
+++ b/core/src/main/java/org/apache/carbondata/core/indexstore/ExtendedBlocklet.java
@@ -25,6 +25,7 @@ import java.util.List;
import java.util.Map;
import org.apache.carbondata.core.datamap.Segment;
+import org.apache.carbondata.core.indexstore.blockletindex.BlockletIndexRowIndexes;
import org.apache.carbondata.core.indexstore.row.IndexRow;
import org.apache.carbondata.core.metadata.ColumnarFormatVersion;
import org.apache.carbondata.core.metadata.schema.table.column.ColumnSchema;
@@ -168,7 +169,10 @@ public class ExtendedBlocklet extends Blocklet {
throws IOException {
super.write(out);
if (isCountJob) {
- out.writeLong(inputSplit.getRowCount());
+ // In CarbonInputSplit, getDetailInfo() is a lazy call. we want to avoid this during
+ // countStar query. As rowCount is filled inside getDetailInfo(). In countStar case we may
+ // not have proper row count. So, always take row count from indexRow.
+ out.writeLong(inputSplit.getIndexRow().getInt(BlockletIndexRowIndexes.ROW_COUNT_INDEX));
out.writeUTF(inputSplit.getSegmentId());
} else {
if (dataMapUniqueId == null) {
diff --git a/core/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java b/core/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
index 80eb3a9..5cfb4b1 100644
--- a/core/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
+++ b/core/src/main/java/org/apache/carbondata/hadoop/CarbonInputSplit.java
@@ -735,6 +735,10 @@ public class CarbonInputSplit extends FileSplit
return path;
}
+ public IndexRow getIndexRow() {
+ return indexRow;
+ }
+
public String getFilePath() {
return this.filePath;
}