You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2018/06/22 01:34:48 UTC

[47/50] [abbrv] carbondata git commit: [CARBONDATA-2615][32K] Support page size less than 32000 in CarbondataV3

[CARBONDATA-2615][32K] Support page size less than 32000 in CarbondataV3

Since we support super long string, if it is long enough, a column page
with 32000 rows will exceed 2GB, so we support a page less than 32000
rows.

This closes #2383


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/091a28bf
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/091a28bf
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/091a28bf

Branch: refs/heads/carbonstore
Commit: 091a28bf833a5296dd3878ddb11b243f7f37a8fc
Parents: 2ea3b2d
Author: xuchuanyin <xu...@hust.edu.cn>
Authored: Wed Jun 20 19:07:03 2018 +0800
Committer: kumarvishal09 <ku...@gmail.com>
Committed: Thu Jun 21 11:00:02 2018 +0530

----------------------------------------------------------------------
 .../testsuite/dataload/TestLoadDataGeneral.scala    | 16 ++++++++++++++++
 .../store/CarbonFactDataHandlerColumnar.java        |  7 ++++++-
 2 files changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/091a28bf/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
index 688928f..8b51090 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/dataload/TestLoadDataGeneral.scala
@@ -259,6 +259,22 @@ class TestLoadDataGeneral extends QueryTest with BeforeAndAfterEach {
       CarbonLoadOptionConstants.ENABLE_CARBON_LOAD_DIRECT_WRITE_HDFS,
       originStatus)
   }
+
+  test("test data loading with page size less than 32000") {
+    CarbonProperties.getInstance().addProperty(
+      CarbonCommonConstants.BLOCKLET_SIZE, "16000")
+
+    val testData = s"$resourcesPath/sample.csv"
+    sql(s"LOAD DATA LOCAL INPATH '$testData' into table loadtest")
+    checkAnswer(
+      sql("SELECT COUNT(*) FROM loadtest"),
+      Seq(Row(6))
+    )
+
+    CarbonProperties.getInstance().addProperty(CarbonCommonConstants.BLOCKLET_SIZE,
+      CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL)
+  }
+
   override def afterEach {
     sql("DROP TABLE if exists loadtest")
     sql("drop table if exists invalidMeasures")

http://git-wip-us.apache.org/repos/asf/carbondata/blob/091a28bf/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
index c0acadd..5fe3261 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/store/CarbonFactDataHandlerColumnar.java
@@ -371,8 +371,13 @@ public class CarbonFactDataHandlerColumnar implements CarbonFactHandler {
     this.pageSize = Integer.parseInt(CarbonProperties.getInstance()
         .getProperty(CarbonCommonConstants.BLOCKLET_SIZE,
             CarbonCommonConstants.BLOCKLET_SIZE_DEFAULT_VAL));
+    // support less than 32000 rows in one page, because we support super long string,
+    // if it is long enough, a clomun page with 32000 rows will exceed 2GB
     if (version == ColumnarFormatVersion.V3) {
-      this.pageSize = CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
+      this.pageSize =
+          pageSize < CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT ?
+              pageSize :
+              CarbonV3DataFormatConstants.NUMBER_OF_ROWS_PER_BLOCKLET_COLUMN_PAGE_DEFAULT;
     }
     LOGGER.info("Number of rows per column blocklet " + pageSize);
     dataRows = new ArrayList<>(this.pageSize);