You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2018/11/30 16:33:55 UTC

[02/26] carbondata git commit: [CARBONDATA-3134] fixed null values when cachelevel is set as blocklet

[CARBONDATA-3134] fixed null values when cachelevel is set as blocklet

Problem:
For each blocklet an object of SegmentPropertiesAndSchemaHolder is created to store the schema used for query. This object is created only if no other blocklet has the same schema. To check the schema we are comparing List<ColumnSchema>, as the equals method in ColumnSchema does not check for columnUniqueId therefore this check is failing and the new restructured blocklet is using the schema of the old blocklet. Due to this the newly added column is being ignored as the old blocklet schema specifies that the column is delete(alter drop).

Solution:
Instead of checking the equality through equals and hashcode, write a new implementation for both and check based on columnUniqueId.

This closes #2956


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/4e8b35c5
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/4e8b35c5
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/4e8b35c5

Branch: refs/heads/branch-1.5
Commit: 4e8b35c5957617c07130467adf15b5f3458c8afd
Parents: 6eed17c
Author: kunal642 <ku...@gmail.com>
Authored: Tue Nov 27 14:13:27 2018 +0530
Committer: ravipesala <ra...@gmail.com>
Committed: Fri Nov 30 21:55:29 2018 +0530

----------------------------------------------------------------------
 .../block/SegmentPropertiesAndSchemaHolder.java | 40 ++++++++++++++++++--
 .../schema/table/column/ColumnSchema.java       |  4 ++
 .../StandardPartitionTableQueryTestCase.scala   |  2 +-
 3 files changed, 42 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/4e8b35c5/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
index 1b7e1f8..6f9a93d 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/block/SegmentPropertiesAndSchemaHolder.java
@@ -18,6 +18,8 @@ package org.apache.carbondata.core.datastore.block;
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
@@ -332,13 +334,45 @@ public class SegmentPropertiesAndSchemaHolder {
       }
       SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper other =
           (SegmentPropertiesAndSchemaHolder.SegmentPropertiesWrapper) obj;
-      return tableIdentifier.equals(other.tableIdentifier) && columnsInTable
-          .equals(other.columnsInTable) && Arrays
+      return tableIdentifier.equals(other.tableIdentifier) && checkColumnSchemaEquality(
+          columnsInTable, other.columnsInTable) && Arrays
           .equals(columnCardinality, other.columnCardinality);
     }
 
+    private boolean checkColumnSchemaEquality(List<ColumnSchema> obj1, List<ColumnSchema> obj2) {
+      if (obj1 == null || obj2 == null || (obj1.size() != obj2.size())) {
+        return false;
+      }
+      List<ColumnSchema> clonedObj1 = new ArrayList<>(obj1);
+      List<ColumnSchema> clonedObj2 = new ArrayList<>(obj2);
+      clonedObj1.addAll(obj1);
+      clonedObj2.addAll(obj2);
+      sortList(clonedObj1);
+      sortList(clonedObj2);
+      boolean exists = true;
+      for (int i = 0; i < obj1.size(); i++) {
+        if (!clonedObj1.get(i).equalsWithStrictCheck(clonedObj2.get(i))) {
+          exists = false;
+          break;
+        }
+      }
+      return exists;
+    }
+
+    private void sortList(List<ColumnSchema> columnSchemas) {
+      Collections.sort(columnSchemas, new Comparator<ColumnSchema>() {
+        @Override public int compare(ColumnSchema o1, ColumnSchema o2) {
+          return o1.getColumnUniqueId().compareTo(o2.getColumnUniqueId());
+        }
+      });
+    }
+
     @Override public int hashCode() {
-      return tableIdentifier.hashCode() + columnsInTable.hashCode() + Arrays
+      int allColumnsHashCode = 0;
+      for (ColumnSchema columnSchema: columnsInTable) {
+        allColumnsHashCode = allColumnsHashCode + columnSchema.strictHashCode();
+      }
+      return tableIdentifier.hashCode() + allColumnsHashCode + Arrays
           .hashCode(columnCardinality);
     }
 

http://git-wip-us.apache.org/repos/asf/carbondata/blob/4e8b35c5/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
index cf7ecab..0606cbd 100644
--- a/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
+++ b/core/src/main/java/org/apache/carbondata/core/metadata/schema/table/column/ColumnSchema.java
@@ -297,6 +297,10 @@ public class ColumnSchema implements Serializable, Writable {
     return result;
   }
 
+  public int strictHashCode() {
+    return hashCode() + columnUniqueId.hashCode() + encodingList.hashCode();
+  }
+
   /**
    * Overridden equals method for columnSchema
    */

http://git-wip-us.apache.org/repos/asf/carbondata/blob/4e8b35c5/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
index 8107cd5..c7957c1 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/spark/testsuite/standardpartition/StandardPartitionTableQueryTestCase.scala
@@ -440,7 +440,7 @@ test("Creation of partition table should fail if the colname in table schema and
   test("validate data in partition table after dropping and adding a column") {
     sql("drop table if exists par")
     sql("create table par(name string) partitioned by (age double) stored by " +
-              "'carbondata'")
+              "'carbondata' TBLPROPERTIES('cache_level'='blocklet')")
     sql(s"load data local inpath '$resourcesPath/uniqwithoutheader.csv' into table par options" +
         s"('header'='false')")
     sql("alter table par drop columns(name)")