You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2018/06/21 05:30:52 UTC
carbondata git commit: [CARBONDATA-2615][32K] Support page size less
than 32000 in CarbondataV3
Repository: carbondata
Updated Branches:
refs/heads/master 2ea3b2dc5 -> 091a28bf8
[CARBONDATA-2615][32K] Support page size less than 32000 in CarbondataV3
Since we support super long string, if it is long enough, a column page
with 32000 rows will exceed 2GB, so we support a page less than 32000
rows.
This closes #2383
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/091a28bf
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/091a28bf
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/091a28bf
Branch: refs/heads/master
Commit: 091a28bf833a5296dd3878ddb11b243f7f37a8fc
Parents: 2ea3b2d
Author: xuchuanyin <xu...@hust.edu.cn>
Authored: Wed Jun 20 19:07:03 2018 +0800
Committer: kumarvishal09 <ku...@gmail.com>
Committed: Thu Jun 21 11:00:02 2018 +0530
----------------------------------------------------------------------
.../testsuite/dataload/TestLoadDataGeneral.scala | 16 ++++++++++++++++
.../store/CarbonFactDataHandlerColumnar.java | 7 ++++++-
2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/091a28bf/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
index 688928f..8b51090 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
@@ -259,6 +259,22 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS,
originStatus)
}
+
+ test("test data loading with page size less than 32000") {
+ CarbonProperties.getInstance().addProperty(
+ CarbonCommonConstants.BLOCKLET_SIZE, "16000")
+
+ val testData = s"$resourcesPath/sample.csv"
+ sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
+ checkAnswer(
+ sql("SELECT COUNT(*) FROM loadtest"),
+ Seq(Row(6))
+ )
+
+ CarbonProperties.getInstance().addProperty(CarbonCommonConstants.BLOCKLET_SIZE,
+ CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL)
+ }
+
override def afterEach {
sql("DROP TABLE if exists loadtest")
sql("drop table if exists invalidMeasures")
http://git-wip-us.apache.org/repos/asf/carbondata/blob/091a28bf/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
index c0acadd..5fe3261 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
@@ -371,8 +371,13 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler {
this.pageSize = Integer.parseInt(CarbonProperties.getInstance()
.getProperty(CarbonCommonConstants.BLOCKLET_SIZE,
CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL));
+ // support less than 32000 rows in one page, because we support super long string,
+ // if it is long enough, a clomun page with 32000 rows will exceed 2GB
if (version == ColumnarFormatVersion.V3) {
- this.pageSize = CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
+ this.pageSize =
+ pageSize < CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT ?
+ pageSize :
+ CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
}
LOGGER.info("Number of rows per column blocklet " + pageSize);
dataRows = new ArrayList<>(this.pageSize);