You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by qi...@apache.org on 2020/04/09 09:14:28 UTC

[carbondata] branch master updated: [CARBONDATA-3753] optimize double/float stats collector

This is an automated email from the ASF dual-hosted git repository.

qiangcai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/carbondata.git


The following commit(s) were added to refs/heads/master by this push:
     new 5666973  [CARBONDATA-3753] optimize double/float stats collector
5666973 is described below

commit 5666973c21cb415df10cd35d20e245763bb67ad1
Author: ajantha-bhat <aj...@gmail.com>
AuthorDate: Thu Mar 26 15:07:57 2020 +0530

    [CARBONDATA-3753] optimize double/float stats collector
    
    Why is this PR needed?
    For every double/float column's value. we call
    PrimitivePageStatsCollector.getDecimalCount(double value)
    problem is, here we create new bigdecimal object and plain string object every time.
    Which leads in huge memory usage during insert.
    
    What changes were proposed in this PR?
    Create only Bigdecimal object and use scale from that.
    
    This closes #3682
---
 .../page/statistics/PrimitivePageStatsCollector.java | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java
index ab885b3..190a8d1 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/statistics/PrimitivePageStatsCollector.java
@@ -233,20 +233,18 @@ public class PrimitivePageStatsCollector implements ColumnPageStatsCollector, Si
 
   /**
    * Return number of digit after decimal point
-   * TODO: it operation is costly, optimize for performance
    */
   private int getDecimalCount(double value) {
     int decimalPlaces = 0;
     try {
-      String strValue = BigDecimal.valueOf(Math.abs(value)).toPlainString();
-      int integerPlaces = strValue.indexOf('.');
-      if (-1 != integerPlaces) {
-        decimalPlaces = strValue.length() - integerPlaces - 1;
+      BigDecimal decimalValue = BigDecimal.valueOf(value);
+      decimalPlaces = decimalValue.scale();
+      if (decimalPlaces == 1) {
         // If decimal places are one and it is just zero then treat the decimal count a zero.
-        if (decimalPlaces == 1) {
-          if (strValue.substring(integerPlaces + 1, strValue.length()).equals(ZERO_STRING)) {
-            decimalPlaces = 0;
-          }
+        // note: here toString() uses stringCache of BigDecimal
+        String str = decimalValue.toString();
+        if (str.charAt(str.length() - 1) == '0') {
+          decimalPlaces = 0;
         }
       }
     } catch (NumberFormatException e) {
@@ -273,7 +271,7 @@ public class PrimitivePageStatsCollector implements ColumnPageStatsCollector, Si
       int decimalCount = getDecimalCount(value);
       decimalCountForComplexPrimitive = decimalCount;
       if (decimalCount > 5) {
-        // If deciaml count is too big, we do not do adaptive encoding.
+        // If decimal count is too big, we do not do adaptive encoding.
         // So set decimal to negative value
         decimal = -1;
       } else if (decimalCount > decimal) {
@@ -294,7 +292,7 @@ public class PrimitivePageStatsCollector implements ColumnPageStatsCollector, Si
       int decimalCount = getDecimalCount(value);
       decimalCountForComplexPrimitive = decimalCount;
       if (decimalCount > 5) {
-        // If deciaml count is too big, we do not do adaptive encoding.
+        // If decimal count is too big, we do not do adaptive encoding.
         // So set decimal to negative value
         decimal = -1;
       } else if (decimalCount > decimal) {