You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ma...@apache.org on 2018/06/13 15:05:17 UTC

carbondata git commit: [CARBONDATA-2604] Getting ArrayIndexOutOfBoundException during compaction after IUD in cluster is fixed

Repository: carbondata
Updated Branches:
  refs/heads/master ff0364599 -> efad40d57


[CARBONDATA-2604] Getting ArrayIndexOutOfBoundException during compaction after IUD in cluster is fixed

Issue: if some records are deleted then during filling the measure and dimension data no of valid rows count and actual rows may be different if
some records are deleted . and during filling of measure data it will iterrate over the scanned result. so it will cause ArrayIndexOutOfBoundException

Solution : Make a new temp list to collect the measure and dimension data during scan and fill inside RawBasedResultCollector and add it to final list

This closes #2369


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/efad40d5
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/efad40d5
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/efad40d5

Branch: refs/heads/master
Commit: efad40d5723849a351ec700e8e4e346cac8c3454
Parents: ff03645
Author: rahul <ra...@knoldus.in>
Authored: Tue Jun 12 19:26:40 2018 +0530
Committer: manishgupta88 <to...@gmail.com>
Committed: Wed Jun 13 20:38:24 2018 +0530

----------------------------------------------------------------------
 .../collector/impl/RawBasedResultCollector.java  | 12 +++++++++---
 .../sdv/generated/DataLoadingIUDTestCase.scala   | 19 +++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/efad40d5/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
index d28df0a..7302b2c 100644
--- a/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
+++ b/core/src/main/java/org/apache/carbondata/core/scan/collector/impl/RawBasedResultCollector.java
@@ -92,16 +92,22 @@ public class RawBasedResultCollector extends AbstractScannedResultCollector {
         // re initialized with left over value
         batchSize = 0;
       }
+      // for every iteration of available rows filling newly created list of Object[] and add it to
+      // the final list so there is no mismatch in the counter while filling dimension and
+      // measure data
+      List<Object[]> collectedData = new ArrayList<>(availableBatchRowCount);
       // fill dimension data
-      fillDimensionData(scannedResult, listBasedResult, queryMeasures, availableBatchRowCount);
-      fillMeasureData(scannedResult, listBasedResult);
+      fillDimensionData(scannedResult, collectedData, queryMeasures, availableBatchRowCount);
+      fillMeasureData(scannedResult, collectedData);
       // increment the number of rows scanned in scanned result statistics
       incrementScannedResultRowCounter(scannedResult, availableBatchRowCount);
       // assign the left over rows to batch size if the number of rows fetched are lesser
       // than batchSize
-      if (listBasedResult.size() < availableBatchRowCount) {
+      if (collectedData.size() < availableBatchRowCount) {
         batchSize += availableBatchRowCount - listBasedResult.size();
       }
+      // add the collected data to the final list
+      listBasedResult.addAll(collectedData);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/efad40d5/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
index 4c232be..79458f5 100644
--- a/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
+++ b/integration/spark-common-cluster-test/src/test/scala/org/apache/carbondata/cluster/sdv/generated/DataLoadingIUDTestCase.scala
@@ -3671,6 +3671,23 @@ test("HQ_Defect_TC_2016110901163", Include) {
    sql(s"""drop table default.t_carbn01  """).collect
 }
 
+  test("[CARBONDATA-2604] ", Include){
+    sql("drop table if exists brinjal").collect
+    sql("create table brinjal (imei string,AMSize string,channelsId string,ActiveCountry string, Activecity string,gamePointId double,deviceInformationId double,productionDate Timestamp,deliveryDate timestamp,deliverycharge double) STORED BY 'org.apache.carbondata.format' TBLPROPERTIES('table_blocksize'='2000','sort_columns'='imei')").collect
+    sql(s"""LOAD DATA INPATH '$resourcesPath/Data/InsertData/vardhandaterestruct.csv' INTO TABLE brinjal OPTIONS('DELIMITER'=',', 'QUOTECHAR'= '','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'= 'imei,deviceInformationId,AMSize,channelsId,ActiveCountry,Activecity,gamePointId,productionDate,deliveryDate,deliverycharge')""").collect
+    sql(s"""LOAD DATA INPATH '$resourcesPath/Data/InsertData/vardhandaterestruct.csv' INTO TABLE brinjal OPTIONS('DELIMITER'=',', 'QUOTECHAR'= '','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'= 'imei,deviceInformationId,AMSize,channelsId,ActiveCountry,Activecity,gamePointId,productionDate,deliveryDate,deliverycharge')""").collect
+    sql(s"""LOAD DATA INPATH '$resourcesPath/Data/InsertData/vardhandaterestruct.csv' INTO TABLE brinjal OPTIONS('DELIMITER'=',', 'QUOTECHAR'= '','BAD_RECORDS_ACTION'='FORCE','FILEHEADER'= 'imei,deviceInformationId,AMSize,channelsId,ActiveCountry,Activecity,gamePointId,productionDate,deliveryDate,deliverycharge')""").collect
+    sql("insert into brinjal select * from brinjal").collect
+    sql("update brinjal set (AMSize)= ('8RAM size') where AMSize='4RAM size'").collect
+    sql("delete from brinjal where AMSize='8RAM size'").collect
+    sql("delete from table brinjal where segment.id IN(0)").collect
+    sql("clean files for table brinjal").collect
+    sql("alter table brinjal compact 'minor'").collect
+    sql("alter table brinjal compact 'major'").collect
+    checkAnswer(s"""select count(*) from brinjal""",
+      Seq(Row(335)), "CARBONDATA-2604")
+    sql("drop table if exists brinjal")
+  }
 override def afterAll {
   sql("use default").collect
   sql("drop table if exists t_carbn02").collect
@@ -3701,5 +3718,7 @@ override def afterAll {
   sql("drop table if exists t_carbn01b").collect
   sql("drop table if exists T_Hive1").collect
   sql("drop table if exists T_Hive6").collect
+  sql("drop table if exists brinjal")
+
 }
 }
\ No newline at end of file