You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/04/17 07:34:30 UTC
[carbondata] branch master updated: [CARBONDATA-3334] fixed
multiple segment file issue for partition
This is an automated email from the ASF dual-hosted git repository.
ravipesala pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git
The following commit(s) were added to refs/heads/master by this push:
new 32af97e [CARBONDATA-3334] fixed multiple segment file issue for partition
32af97e is described below
commit 32af97e9c8ba05b6073678a1f46e0dce760fa07a
Author: kunal642 <ku...@gmail.com>
AuthorDate: Thu Mar 28 14:33:45 2019 +0530
[CARBONDATA-3334] fixed multiple segment file issue for partition
Problem:
During partition load, while writing merge index files the FactTimestamp in load model is being changed to current timestamp due to which a new file with mergeindex entry is written.
Solution:
Set new timestamp if FactTimestamp in load model is 0L(meaning nothing is set).
This closes #3167
---
.../standardpartition/StandardPartitionTableLoadingTestCase.scala | 8 ++++++++
.../sql/execution/command/management/CarbonLoadDataCommand.scala | 3 ++-
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
index 059dd2b..bee118a 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableLoadingTestCase.scala
@@ -496,6 +496,13 @@ class StandardPartitionTableLoadingTestCase extends QueryTest with BeforeAndAfte
}
}
+ test("test number of segment files should not be more than 1 per segment") {
+ sql("drop table if exists new_par")
+ sql("create table new_par(a string) partitioned by ( b int) stored by 'carbondata'")
+ sql("insert into new_par select 'k',1")
+ assert(new File(s"$storeLocation/new_par/Metadata/segments/").listFiles().size == 1)
+ }
+
def restoreData(dblocation: String, tableName: String) = {
@@ -556,6 +563,7 @@ class StandardPartitionTableLoadingTestCase extends QueryTest with BeforeAndAfte
sql("drop table if exists emp1")
sql("drop table if exists restorepartition")
sql("drop table if exists casesensitivepartition")
+ sql("drop table if exists new_par")
}
}
diff --git a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
index 0c8a1df..b4ef1f0 100644
--- a/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
+++ b/integration/spark2/src/main/scala/org/apache/spark/sql/execution/command/management/CarbonLoadDataCommand.scala
@@ -805,6 +805,8 @@ case class CarbonLoadDataCommand(
}
if (updateModel.isDefined) {
carbonLoadModel.setFactTimeStamp(updateModel.get.updatedTimeStamp)
+ } else if (carbonLoadModel.getFactTimeStamp == 0L) {
+ carbonLoadModel.setFactTimeStamp(System.currentTimeMillis())
}
// Create and ddd the segment to the tablestatus.
CarbonLoaderUtil.readAndUpdateLoadProgressInTableMeta(carbonLoadModel, isOverwriteTable)
@@ -869,7 +871,6 @@ case class CarbonLoadDataCommand(
}
}
try {
- carbonLoadModel.setFactTimeStamp(System.currentTimeMillis())
val compactedSegments = new util.ArrayList[String]()
// Trigger auto compaction
CarbonDataRDDFactory.handleSegmentMerging(