You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2016/08/01 10:05:26 UTC

[28/47] incubator-carbondata git commit: [CARBONDATA-95] Problem Columns values with numeric data types are not getting parsed when included in dictionary_include (#869)

[CARBONDATA-95] Problem Columns values with numeric data types are not getting parsed when included in dictionary_include (#869)

Analysis: When a numeric datatype lets say Decimal is defined for a column and the column is included as dictionary_include, then the whatever precision and scale is defined by the user is not taken into consideration and each value is accepted and dictionary is generated for that value.

Solution: Parse big decimal while generating global dictionary and dictionary look up and set the precision and scale specified by the user

Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/adba5973
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/adba5973
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/adba5973

Branch: refs/heads/master
Commit: adba597364b418156c60c317fa99674dd7ad71d0
Parents: 6dfaefe
Author: manishgupta88 <to...@gmail.com>
Authored: Thu Jul 28 16:09:24 2016 +0530
Committer: Kumar Vishal <ku...@gmail.com>
Committed: Thu Jul 28 16:09:24 2016 +0530

----------------------------------------------------------------------
 .../processing/datatypes/PrimitiveDataType.java |  3 +-
 .../CarbonCSVBasedDimSurrogateKeyGen.java       | 16 +++----
 .../csvbased/CarbonCSVBasedSeqGenStep.java      | 46 +++++++++++---------
 3 files changed, 35 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/adba5973/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java b/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
index 3d272d5..d4b9b58 100644
--- a/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
+++ b/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
@@ -163,8 +163,7 @@ public class PrimitiveDataType implements GenericDataType {
       String[] delimiter, int delimiterIndex, DataOutputStream dataOutputStream,
       CarbonCSVBasedDimSurrogateKeyGen surrogateKeyGen) throws KettleException, IOException {
     String parsedValue = DataTypeUtil.parseValue(inputString,
-        surrogateKeyGen.getDimensionNameToDimensionMapping()
-            .get(tableName + CarbonCommonConstants.UNDERSCORE + name));
+        surrogateKeyGen.getDimensionOrdinalToDimensionMapping()[dimensionOrdinal]);
     Integer surrogateKey = null;
     if (null == parsedValue) {
       surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/adba5973/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
index 8fc1196..b72de8e 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
@@ -117,9 +117,9 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
       new HashMap<String, Map<ArrayWrapper, Integer>>(
           CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
   /**
-   * dimension name to dimension mapping
+   * dimension ordinal to dimension mapping
    */
-  private Map<String, CarbonDimension> dimensionNameToDimensionMapping;
+  private CarbonDimension[] dimensionOrdinalToDimensionMapping;
   /**
    * rwLock2
    */
@@ -520,15 +520,15 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
   /**
    * @return
    */
-  public Map<String, CarbonDimension> getDimensionNameToDimensionMapping() {
-    return dimensionNameToDimensionMapping;
+  public CarbonDimension[] getDimensionOrdinalToDimensionMapping() {
+    return dimensionOrdinalToDimensionMapping;
   }
 
   /**
-   * @param dimensionNameToDimensionMapping
+   * @param dimensionOrdinalToDimensionMapping
    */
-  public void setDimensionNameToDimensionMapping(
-      Map<String, CarbonDimension> dimensionNameToDimensionMapping) {
-    this.dimensionNameToDimensionMapping = dimensionNameToDimensionMapping;
+  public void setDimensionOrdinalToDimensionMapping(
+      CarbonDimension[] dimensionOrdinalToDimensionMapping) {
+    this.dimensionOrdinalToDimensionMapping = dimensionOrdinalToDimensionMapping;
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/adba5973/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
index 207e68d..d5ecb37 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
@@ -435,7 +435,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
         if (null != getInputRowMeta()) {
           generateNoDictionaryAndComplexIndexMapping();
           data.getSurrogateKeyGen()
-              .setDimensionNameToDimensionMapping(populateNameToCarbonDimensionMap());
+              .setDimensionOrdinalToDimensionMapping(populateNameToCarbonDimensionMap());
         }
         serializationNullFormat = meta.getTableOptionWrapper().get("serialization_null_format");
       }
@@ -1147,10 +1147,8 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
               surrogateKeyGen.max[m] = Integer.MAX_VALUE;
 
             } else {
-              String parsedValue = DataTypeUtil.parseValue(tuple,
-                  data.getSurrogateKeyGen().getDimensionNameToDimensionMapping().get(
-                      meta.getTableName() + CarbonCommonConstants.UNDERSCORE + columnName
-                          .toLowerCase()));
+              String parsedValue = DataTypeUtil.parseValue(tuple, data.getSurrogateKeyGen()
+                  .getDimensionOrdinalToDimensionMapping()[memberMapping[i]]);
               if(null == parsedValue) {
                 surrogateKeyForHrrchy[0] = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;
               } else {
@@ -1838,34 +1836,42 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
     }
   }
 
-  private Map<String, CarbonDimension> populateNameToCarbonDimensionMap() {
+  private CarbonDimension[] populateNameToCarbonDimensionMap() {
     CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(
         meta.getSchemaName() + CarbonCommonConstants.UNDERSCORE + meta.getTableName());
     List<CarbonDimension> dimensionsList = carbonTable.getDimensionByTableName(meta.getTableName());
-    Map<String, CarbonDimension> dimensionNameToDimensionMapping =
-        new HashMap<>(dimensionsList.size());
-    for (CarbonDimension dimension : dimensionsList) {
+    CarbonDimension[] dimensionOrdinalToDimensionMapping =
+        new CarbonDimension[meta.getColumnSchemaDetailsWrapper().getColumnSchemaDetailsMap()
+            .size()];
+    List<CarbonDimension> dimListExcludingNoDictionaryColumn = dimensionsList;
+    if (null != meta.getNoDictionaryDims() && meta.getNoDictionaryDims().length() > 0) {
+      dimListExcludingNoDictionaryColumn =
+          new ArrayList<>(dimensionsList.size() - meta.noDictionaryCols.length);
+      for (CarbonDimension dimension : dimensionsList) {
+        if (!dimension.getEncoder().isEmpty()) {
+          dimListExcludingNoDictionaryColumn.add(dimension);
+        }
+      }
+    }
+    for (int i = 0; i < dimListExcludingNoDictionaryColumn.size(); i++) {
+      CarbonDimension dimension = dimListExcludingNoDictionaryColumn.get(meta.memberMapping[i]);
       if (dimension.isComplex()) {
-        populateComplexDimension(dimensionNameToDimensionMapping, dimension);
+        populateComplexDimension(dimensionOrdinalToDimensionMapping, dimension);
       } else {
-        dimensionNameToDimensionMapping.put(
-            meta.getTableName() + CarbonCommonConstants.UNDERSCORE + dimension.getColName()
-                .toLowerCase(), dimension);
+        dimensionOrdinalToDimensionMapping[meta.memberMapping[i]] = dimension;
       }
     }
-    return dimensionNameToDimensionMapping;
+    return dimensionOrdinalToDimensionMapping;
   }
 
-  private void populateComplexDimension(
-      Map<String, CarbonDimension> dimensionNameToDimensionMapping, CarbonDimension dimension) {
+  private void populateComplexDimension(CarbonDimension[] dimensionOrdinalToDimensionMapping,
+      CarbonDimension dimension) {
     List<CarbonDimension> listOfChildDimensions = dimension.getListOfChildDimensions();
     for (CarbonDimension childDimension : listOfChildDimensions) {
       if (childDimension.isComplex()) {
-        populateComplexDimension(dimensionNameToDimensionMapping, childDimension);
+        populateComplexDimension(dimensionOrdinalToDimensionMapping, childDimension);
       } else {
-        dimensionNameToDimensionMapping.put(
-            meta.getTableName() + CarbonCommonConstants.UNDERSCORE + childDimension.getColName(),
-            childDimension);
+        dimensionOrdinalToDimensionMapping[childDimension.getOrdinal()] = childDimension;
       }
     }
   }