You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ku...@apache.org on 2018/11/14 04:43:16 UTC

carbondata git commit: [CARBONDATA-3084]dataload failure fix when float value exceeds the limit

Repository: carbondata
Updated Branches:
  refs/heads/master 07943cec0 -> 4de60509d


[CARBONDATA-3084]dataload failure fix when float value exceeds the limit

Problem:
when the float value exceeds the range and we try to insert that data, data load fails.

Analysis:
when the value exceeds the range, the max is set as Infinity, so the decimal count of that value will be 0, so when decimal count is zero we go for CodecByAlgorithmForIntegral, so it fails

Solution:
when the value exceeds, and decimal count is zero , source datatype is float, then select DirectCompressCodec

This closes #2903


Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/4de60509
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/4de60509
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/4de60509

Branch: refs/heads/master
Commit: 4de60509df0d60415c6e965d8756b945d6a88103
Parents: 07943ce
Author: akashrn5 <ak...@gmail.com>
Authored: Tue Nov 6 12:36:24 2018 +0530
Committer: kunal642 <ku...@gmail.com>
Committed: Wed Nov 14 10:02:08 2018 +0530

----------------------------------------------------------------------
 .../page/encoding/DefaultEncodingFactory.java   | 50 ++++++++++++--------
 .../primitiveTypes/FloatDataTypeTestCase.scala  | 14 ++++++
 2 files changed, 44 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/carbondata/blob/4de60509/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
index 146d5dd..506e1c7 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/DefaultEncodingFactory.java
@@ -325,32 +325,42 @@ public class DefaultEncodingFactory extends EncodingFactory {
     //Here we should use the Max abs as max to getDatatype, let's say -1 and -10000000, -1 is max,
     //but we can't use -1 to getDatatype, we should use -10000000.
     double absMaxValue = Math.max(Math.abs(maxValue), Math.abs(minValue));
-    if (decimalCount == 0) {
+    if (srcDataType == DataTypes.FLOAT && decimalCount == 0) {
+      return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, srcDataType, maxValue,
+          minValue, decimalCount, absMaxValue);
+    } else if (decimalCount == 0) {
       // short, int, long
       return selectCodecByAlgorithmForIntegral(stats, false, columnSpec);
     } else if (decimalCount < 0 && !isComplexPrimitive) {
       return new DirectCompressCodec(DataTypes.DOUBLE);
     } else {
-      // double
-      // If absMaxValue exceeds LONG.MAX_VALUE, then go for direct compression
-      if ((Math.pow(10, decimalCount) * absMaxValue) > Long.MAX_VALUE) {
-        return new DirectCompressCodec(DataTypes.DOUBLE);
+      return getColumnPageCodec(stats, isComplexPrimitive, columnSpec, srcDataType, maxValue,
+          minValue, decimalCount, absMaxValue);
+    }
+  }
+
+  private static ColumnPageCodec getColumnPageCodec(SimpleStatsResult stats,
+      boolean isComplexPrimitive, TableSpec.ColumnSpec columnSpec, DataType srcDataType,
+      double maxValue, double minValue, int decimalCount, double absMaxValue) {
+    // double
+    // If absMaxValue exceeds LONG.MAX_VALUE, then go for direct compression
+    if ((Math.pow(10, decimalCount) * absMaxValue) > Long.MAX_VALUE) {
+      return new DirectCompressCodec(DataTypes.DOUBLE);
+    } else {
+      long max = (long) (Math.pow(10, decimalCount) * absMaxValue);
+      DataType adaptiveDataType = fitLongMinMax(max, 0);
+      DataType deltaDataType = compareMinMaxAndSelectDataType(
+          (long) (Math.pow(10, decimalCount) * (maxValue - minValue)));
+      if (adaptiveDataType.getSizeInBytes() > deltaDataType.getSizeInBytes()) {
+        return new AdaptiveDeltaFloatingCodec(srcDataType, deltaDataType, stats,
+            isInvertedIndex(isComplexPrimitive, columnSpec));
+      } else if (adaptiveDataType.getSizeInBytes() < DataTypes.DOUBLE.getSizeInBytes() || (
+          (isComplexPrimitive) && (adaptiveDataType.getSizeInBytes() == DataTypes.DOUBLE
+              .getSizeInBytes()))) {
+        return new AdaptiveFloatingCodec(srcDataType, adaptiveDataType, stats,
+            isInvertedIndex(isComplexPrimitive, columnSpec));
       } else {
-        long max = (long) (Math.pow(10, decimalCount) * absMaxValue);
-        DataType adaptiveDataType = fitLongMinMax(max, 0);
-        DataType deltaDataType = compareMinMaxAndSelectDataType(
-            (long) (Math.pow(10, decimalCount) * (maxValue - minValue)));
-        if (adaptiveDataType.getSizeInBytes() > deltaDataType.getSizeInBytes()) {
-          return new AdaptiveDeltaFloatingCodec(srcDataType, deltaDataType, stats,
-              isInvertedIndex(isComplexPrimitive, columnSpec));
-        } else if (adaptiveDataType.getSizeInBytes() < DataTypes.DOUBLE.getSizeInBytes() || (
-            (isComplexPrimitive) && (adaptiveDataType.getSizeInBytes() == DataTypes.DOUBLE
-                .getSizeInBytes()))) {
-          return new AdaptiveFloatingCodec(srcDataType, adaptiveDataType, stats,
-              isInvertedIndex(isComplexPrimitive, columnSpec));
-        } else {
-          return new DirectCompressCodec(DataTypes.DOUBLE);
-        }
+        return new DirectCompressCodec(DataTypes.DOUBLE);
       }
     }
   }

http://git-wip-us.apache.org/repos/asf/carbondata/blob/4de60509/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
----------------------------------------------------------------------
diff --git a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
index 076ef2c..9574cd5 100644
--- a/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
+++ b/integration/spark-common-test/src/test/scala/org/apache/carbondata/integration/spark/testsuite/primitiveTypes/FloatDataTypeTestCase.scala
@@ -26,6 +26,8 @@ import org.scalatest.BeforeAndAfterAll
 class FloatDataTypeTestCase extends QueryTest with BeforeAndAfterAll {
 
   override def beforeAll {
+    sql("DROP TABLE IF EXISTS datatype_float_hive")
+    sql("DROP TABLE IF EXISTS datatype_float_byte")
     sql("DROP TABLE IF EXISTS tfloat")
     sql("""
            CREATE TABLE IF NOT EXISTS tfloat
@@ -57,7 +59,19 @@ class FloatDataTypeTestCase extends QueryTest with BeforeAndAfterAll {
       Seq(Row(24.56)))
   }
 
+  test("test when float range exceeds") {
+    sql("create table datatype_float_hive(f float, b byte)")
+    sql("insert into datatype_float_hive select 1.7976931348623157E308,-127")
+    sql("create table datatype_float_byte(f float, b byte) using carbon")
+    sql("insert into datatype_float_byte select 1.7976931348623157E308,-127")
+    checkAnswer(
+      sql("SELECT f FROM datatype_float_byte"),
+      sql("SELECT f FROM datatype_float_hive"))
+  }
+
   override def afterAll {
     sql("DROP TABLE IF EXISTS tfloat")
+    sql("DROP TABLE IF EXISTS datatype_float_byte")
+    sql("DROP TABLE IF EXISTS datatype_float_hive")
   }
 }
\ No newline at end of file